bufr.pm:bufrread.pl

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
bufr.pm:bufrread.pl [2013-09-13 11:08:46]
pals
bufr.pm:bufrread.pl [2023-02-05 10:14:41] (current)
pals
Line 1: Line 1:
 <code perl> <code perl>
-#!/usr/bin/perl -w+#!/usr/bin/perl
  
-# (C) Copyright 2010, met.no+# (C) Copyright 2010-2023 MET Norway
 # #
 # This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
Line 22: Line 22:
  
 use strict; use strict;
 +use warnings;
 use Getopt::Long; use Getopt::Long;
 use Pod::Usage qw(pod2usage); use Pod::Usage qw(pod2usage);
 use Geo::BUFR; use Geo::BUFR;
 +
 +# This is actually default in BUFR.pm, but provided here to make it
 +# easier for users to change to 'ECCODES' if preferred
 +use constant DEFAULT_TABLE_FORMAT => 'BUFRDC';
  
 # Will be used if neither --tablepath nor $ENV{BUFR_TABLES} is set # Will be used if neither --tablepath nor $ENV{BUFR_TABLES} is set
-use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables'; +use constant DEFAULT_TABLE_PATH_BUFRDC => '/usr/local/lib/bufrtables'; 
-# Ought to be your most up-to-date table +use constant DEFAULT_TABLE_PATH_ECCODES => '/usr/local/share/eccodes/definitions/bufr/tables'; 
-use constant DEFAULT_CTABLE => 'C0000000000000019000';+# Ought to be your most up-to-date code table(s) 
 +use constant DEFAULT_CTABLE_BUFRDC => 'C0000000000000037000'; 
 +use constant DEFAULT_CTABLE_ECCODES => '0/wmo/37';
  
 # Parse command line options # Parse command line options
Line 35: Line 42:
 GetOptions( GetOptions(
            \%option,            \%option,
-           'all_operators',# Show all operator descriptors when printing section 4+           'ahl=s',        # Decode BUFR messages with AHL matching <ahl_regexp> only 
 +           'all_operators',# Show replication descriptors and all operator descriptors 
 +                           # when printing section 4
            'bitmap',       # Display bit-mapped values on same line            'bitmap',       # Display bit-mapped values on same line
            'codetables',   # Use code and flag tables to resolve values            'codetables',   # Use code and flag tables to resolve values
Line 41: Line 50:
            'filter=s',     # Decode observations meeting criteria in <filter file> only            'filter=s',     # Decode observations meeting criteria in <filter file> only
            'help',         # Print help information and exit            'help',         # Print help information and exit
 +           'nodata',       # Do not print (nor decode) section 4 (data section)
            'noqc',         # Do not decode quality control            'noqc',         # Do not decode quality control
            'on_error_stop', # Stop processing if an error occurs            'on_error_stop', # Stop processing if an error occurs
Line 47: Line 57:
            'param=s',      # Decode parameters with descriptors in <descriptor file> only            'param=s',      # Decode parameters with descriptors in <descriptor file> only
            'strict_checking=i', # Enable/disable strict checking of BUFR format            'strict_checking=i', # Enable/disable strict checking of BUFR format
 +           'tableformat=s',  # Set BUFR table format
            'tablepath=s',  # Set BUFR table path            'tablepath=s',  # Set BUFR table path
-           'verbose=i',    # Set verbose level to n, 0<=n<=(default 0)+           'verbose=i',    # Set verbose level to n, 0<=n<=(default 0)
            'width=i',      # Set width of values field (default is 15 characters)            'width=i',      # Set width of values field (default is 15 characters)
        ) or pod2usage(-verbose => 0);        ) or pod2usage(-verbose => 0);
Line 60: Line 71:
 # Set verbosity level # Set verbosity level
 Geo::BUFR->set_verbose($option{verbose}) if $option{verbose}; Geo::BUFR->set_verbose($option{verbose}) if $option{verbose};
 +
 +# Set whether section 4 should be decoded for the BUFR module
 +Geo::BUFR->set_nodata() if ($option{nodata});
  
 # Set whether quality information should be decoded for the BUFR module # Set whether quality information should be decoded for the BUFR module
Line 67: Line 81:
  
 Geo::BUFR->set_show_all_operators($option{all_operators}) if defined $option{all_operators}; Geo::BUFR->set_show_all_operators($option{all_operators}) if defined $option{all_operators};
 +
 +# Set BUFR table format
 +my $tableformat = (defined $option{tableformat}) ? uc $option{tableformat} : DEFAULT_TABLE_FORMAT;
 +Geo::BUFR->set_tableformat($tableformat);
  
 # Set BUFR table path # Set BUFR table path
Line 76: Line 94:
     Geo::BUFR->set_tablepath($ENV{BUFR_TABLES});     Geo::BUFR->set_tablepath($ENV{BUFR_TABLES});
 } else { } else {
-    # If all else fails, use the libbufr bufrtables +    # If all else fails, use the default tablepath in BUFRDC/ECCODES 
-    Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH);+    if ($tableformat eq 'BUFRDC') { 
 +        Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH_BUFRDC)
 +    } elsif ($tableformat eq 'ECCODES'
 +        Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH_ECCODES); 
 +    } 
 +
 + 
 +my $ahl_regexp; 
 +if ($option{ahl}) { 
 +    eval { $ahl_regexp = qr/$option{ahl}/ }; 
 +    die "Argument to --ahl is not a valid Perl regular expression: $@" if $@;
 } }
  
Line 95: Line 123:
  
 # Arrays over filter criteria, used if option --filter is set # Arrays over filter criteria, used if option --filter is set
-my @fid;      # Filter descriptors, .e.g. $fid[1] = [ 001001, 001002 ]+my @fid;      # Filter descriptors, e.g. $fid[1] = [ 001001, 001002 ]
 my @fiv;      # Filter values, e.g. $fiv[1] = [ [ 3, 895 ], [ 6 252 ] ] my @fiv;      # Filter values, e.g. $fiv[1] = [ [ 3, 895 ], [ 6 252 ] ]
 my @num_desc; # Number of filter descriptors for each criterion, e.g. $num_desc[1] = 2 my @num_desc; # Number of filter descriptors for each criterion, e.g. $num_desc[1] = 2
Line 114: Line 142:
 foreach my $inputfname ( @ARGV ) { foreach my $inputfname ( @ARGV ) {
     my $bufr = Geo::BUFR->new();     my $bufr = Geo::BUFR->new();
 +    $bufr->set_filter_cb(\&filter_on_ahl,$ahl_regexp) if $option{ahl};
  
     # Open BUFR file     # Open BUFR file
Line 124: Line 153:
  
  
-# Extract data from BUFR file. Print WMO ahl for first message in +# Extract data from BUFR file. Print AHL for first message in each GTS 
-each WMO bulletin, print message number for each new message, print +# bulletin, print message number for each new message, print subset 
-subset number for each subset.+# number for each subset.
 sub decode { sub decode {
     my $bufr = shift;          # BUFR object     my $bufr = shift;          # BUFR object
Line 139: Line 168:
         # Read next observation. If an error is encountered during         # Read next observation. If an error is encountered during
         # decoding, skip this observation while printing the error         # decoding, skip this observation while printing the error
-        # message to STDERR, also displaying ahl of bulletin if found.+        # message to STDERR, also displaying ahl of bulletin if found 
 +        # (but skip error message if the message should be skipped on 
 +        # --ahl anyway).
         my ($data, $descriptors);         my ($data, $descriptors);
         eval {         eval {
Line 145: Line 176:
         };         };
         if ($@) {         if ($@) {
 +            $current_ahl = $bufr->get_current_ahl() || '';
 +            next READLOOP if $option{ahl} && $current_ahl !~ $ahl_regexp;
 +
             warn $@;             warn $@;
             # Try to extract message number and ahl of the bulletin             # Try to extract message number and ahl of the bulletin
             # where the error occurred             # where the error occurred
-     $current_message_number = $bufr->get_current_message_number(); +            $current_message_number = $bufr->get_current_message_number(); 
-     if ($current_message_number) { +            if (defined $current_message_number) { 
- my $error_msg = "In message $current_message_number"; +                my $error_msg = "In message $current_message_number"; 
- $current_ahl = $bufr->get_current_ahl(); +                $error_msg .= " contained in bulletin with ahl $current_ahl\n" 
- $error_msg .= " contained in bulletin with ahl $current_ahl\n" +                    if $current_ahl; 
-     if $current_ahl; +                warn $error_msg if $error_msg; 
- warn $error_msg if $error_msg; +            }
-     }+
             exit(1) if $option{on_error_stop};             exit(1) if $option{on_error_stop};
             next READLOOP;             next READLOOP;
         }         }
  
-        if ($option{codetables}) {+        next if $option{ahl} && $bufr->is_filtered(); 
 + 
 +        if ($option{codetables} && !$option{nodata}) {
             # Load C table, trying first to use same table version as             # Load C table, trying first to use same table version as
             # the B and D tables loaded in next_observation, or if             # the B and D tables loaded in next_observation, or if
Line 166: Line 201:
             # instead.             # instead.
             my $table_version = $bufr->get_table_version();             my $table_version = $bufr->get_table_version();
-            $bufr->load_Ctable("C$table_version", DEFAULT_CTABLE);+            my $tableformat = Geo::BUFR->get_tableformat(); 
 +            if ($tableformat eq 'BUFRDC') { 
 +                $bufr->load_Ctable("C$table_version", DEFAULT_CTABLE_BUFRDC); 
 +            } elsif ($tableformat eq 'ECCODES'
 +                $bufr->load_Ctable("$table_version", DEFAULT_CTABLE_ECCODES); 
 +            }
         }         }
  
         my $current_subset_number = $bufr->get_current_subset_number();         my $current_subset_number = $bufr->get_current_subset_number();
-        my $nsubsets = $bufr->get_number_of_subsets();+        # If next_observation() did find a BUFR message, subset number 
 +        # should have been set to at least 1 (even in a 0 subset message) 
 +        last READLOOP if $current_subset_number == 0;
  
-        if ($current_subset_number == 1) {+        if ($current_subset_number == 1 || $option{nodata}) {
             $current_message_number = $bufr->get_current_message_number();             $current_message_number = $bufr->get_current_message_number();
             $current_ahl = $bufr->get_current_ahl() || '';             $current_ahl = $bufr->get_current_ahl() || '';
Line 193: Line 235:
                 $section013_dumped = 1;                 $section013_dumped = 1;
             }             }
 +            next READLOOP if $option{nodata};
         } else { # subset number > 1         } else { # subset number > 1
             next READLOOP if ($option{filter}             next READLOOP if ($option{filter}
Line 216: Line 259:
         }         }
  
-        # If this is last message and there is a BUFR formatting +        printf $OUT "\nSubset %d\n", $current_subset_number;
-        # errorwe might end up here with current subset number 0 +
-        last READLOOP if $current_subset_number == 0;+
  
- printf $OUT "\nSubset %d\n", $current_subset_number; +        # If an error is encountered during dumping of section 4, skip 
- +        # this subset while printing the error message to STDERR, also 
- # If an error is encountered during dumping of section 4, skip +        # displaying ahl of bulletin if found. 
- # this subset while printing the error message to STDERR, also +        my $dump; 
- # displaying ahl of bulletin if found. +        eval { 
- my $dump; +            $dump = ( $option{bitmap} ) 
- eval { +                ? $bufr->dumpsection4_with_bitmaps($data, $descriptors, 
-     $dump = ( $option{bitmap} ) +                                                   $current_subset_number, $width) 
- ? $bufr->dumpsection4_with_bitmaps($data, $descriptors, +                : $bufr->dumpsection4($data, $descriptors, $width); 
-    $current_subset_number, $width) +        }; 
- : $bufr->dumpsection4($data, $descriptors, $width); +        if ($@) { 
- }; +            warn $@; 
- if ($@) { +            my $error_msg = "In message $current_message_number" 
-     warn $@; +                . " and subset $current_subset_number"; 
-     my $error_msg = "In message $current_message_number" +            $error_msg .= " contained in bulletin with ahl $current_ahl\n" 
- . " and subset $current_subset_number"; +                if $current_ahl; 
-     $error_msg .= " contained in bulletin with ahl $current_ahl\n" +            warn $error_msg; 
- if $current_ahl; +            exit(1) if $option{on_error_stop}; 
-     warn $error_msg; +            next READLOOP; 
-     exit(1) if $option{on_error_stop}; +        } else { 
-     next READLOOP; +            print $OUT $dump; 
- } else { +        }
-     print $OUT $dump; +
- }+
     }     }
 } }
Line 283: Line 322:
 # Filter routines # Filter routines
  
-# Read in content of $filter_file into variables @fid, @fiv,+sub filter_on_ahl { 
 +    my $obj = shift; 
 +    my $ahl_regexp = shift; 
 +    my $ahl = $obj->get_current_ahl() || ''; 
 +    return $ahl =~ $ahl_regexp ? 0 : 1; 
 +
 + 
 +# Read in contents of $filter_file into variables @fid, @fiv,
 # @num_desc, @num_val and $num_criteria, which are defined above. # @num_desc, @num_val and $num_criteria, which are defined above.
 +# Note that index 0 of the arrays is not used.
 sub read_filter_file { sub read_filter_file {
     my $filter_file = shift;     my $filter_file = shift;
Line 310: Line 357:
         } else {         } else {
             my @values = split;             my @values = split;
 +            # Check that value line contains correct number of values
 +            die "Number of values doesn't match number of descriptors"
 +                . " for line $. in filter file '$filter_file'"
 +                if scalar @values != scalar @{$fid[$num_criteria]};
             # Remove leading 0's in numerical values (to prepare for string comparison)             # Remove leading 0's in numerical values (to prepare for string comparison)
             for $_ (@values) { s/^0+(\d+)$/$1/ };             for $_ (@values) { s/^0+(\d+)$/$1/ };
Line 328: Line 379:
     my ($data, $descriptors) = @_;     my ($data, $descriptors) = @_;
  
-    my $num_ordinary_criteria = @fid - $num_required_criteria;+    my $num_ordinary_criteria = $#fid - $num_required_criteria;
     my $num_success_req_criteria = 0; # Number of required criteria successfully fulfilled     my $num_success_req_criteria = 0; # Number of required criteria successfully fulfilled
     my $num_success_ord_criteria = 0; # Number of ordinary criteria successfully fulfilled     my $num_success_ord_criteria = 0; # Number of ordinary criteria successfully fulfilled
Line 386: Line 437:
                                                  or $num_success_ord_criteria > 0)) {                                                  or $num_success_ord_criteria > 0)) {
                                             return 0; # Don't filter this observation                                             return 0; # Don't filter this observation
 +                                        } else {
 +                                            next DESC;
                                         }                                         }
                                     } else {                                     } else {
Line 414: Line 467:
  
 =pod =pod
 +
 +=encoding utf8
  
 =head1 SYNOPSIS =head1 SYNOPSIS
  
   bufrread.pl <bufr file(s)>   bufrread.pl <bufr file(s)>
 +      [--ahl <ahl_regexp>]
 +      [--all_operators]
 +      [--bitmap]
       [--codetables]       [--codetables]
       [--data_only]       [--data_only]
-      [--param <descriptor file>] 
       [--filter <filter file>]       [--filter <filter file>]
-      [--bitmap]+      [--help] 
 +      [--nodata]
       [--noqc]       [--noqc]
-      [--outfile <filename>]+      [--on_error_stop]
       [--optional_section]       [--optional_section]
-      [--width n]+      [--outfile <filename>
 +      [--param <descriptor file>]
       [--strict_checking n]       [--strict_checking n]
-      [--on_error_stop] +      [--tableformat <BUFRDC|ECCODES>]
-      [--all_operators]+
       [--tablepath <path to BUFR tables>]       [--tablepath <path to BUFR tables>]
       [--verbose n]       [--verbose n]
-      [--help]+      [--width n]
  
 =head1 DESCRIPTION =head1 DESCRIPTION
  
 Extract BUFR messages from BUFR file(s) and print the decoded content Extract BUFR messages from BUFR file(s) and print the decoded content
-to screen. Will include WMO ahl if the BUFR message is part of a WMO +to screen, including AHL (Abbreviated Header Line) if present.
-bulletin.+
  
 Execute without arguments for Usage, with option C<--help> for some Execute without arguments for Usage, with option C<--help> for some
-additional info. See also L</https://wiki.met.no/bufr.pm/start> for+additional info. See also L<https://wiki.met.no/bufr.pm/start> for
 examples of use. examples of use.
  
Line 447: Line 504:
 =head1 OPTIONS =head1 OPTIONS
  
 +   --ahl <ahl_regexp>
 +                   Decode BUFR messages with AHL matching <ahl_regexp> only
 +   --all_operators Show replication descriptors and all operator descriptors
 +                   when printing section 4
 +   --bitmap        Display bit-mapped values on same line
    --codetables    Use code and flag tables to resolve values when unit    --codetables    Use code and flag tables to resolve values when unit
                    is [CODE TABLE] or [FLAG TABLE]                    is [CODE TABLE] or [FLAG TABLE]
    --data_only     Print section 4 (data section) only    --data_only     Print section 4 (data section) only
-   --param <descriptor file> 
-                   Display parameters with descriptors in <descriptor file> only 
    --filter <filter file>    --filter <filter file>
                    Decode observations meeting criteria in <filter file> only                    Decode observations meeting criteria in <filter file> only
-   --bitmap        Display bit-mapped values on same line+   --help          Display Usage and explain the options used. For even 
 +                   more info you might prefer to consult perldoc bufrread.pl 
 +   --nodata        Do not print (nor decode) section 4 (data section)
    --noqc          Do not decode quality control    --noqc          Do not decode quality control
                    (or any descriptors following 222000)                    (or any descriptors following 222000)
 +   --on_error_stop Stop processing as soon as an error occurs during decoding
    --outfile <filename>    --outfile <filename>
                    Will print to <filename> instead of STDOUT                    Will print to <filename> instead of STDOUT
    --optional_section    --optional_section
                    Display a hex dump of optional section if present                    Display a hex dump of optional section if present
-   --width n       Set width of field used for data values to n characters +   --param <descriptor file> 
-                   (default is 15)+                   Display parameters with descriptors in <descriptor file> only
    --strict_checking n n=0 (default) Disable strict checking of BUFR format    --strict_checking n n=0 (default) Disable strict checking of BUFR format
                        n=1 Issue warning if (recoverable) error in                        n=1 Issue warning if (recoverable) error in
Line 468: Line 531:
                        n=2 Croak if (recoverable) error in BUFR format.                        n=2 Croak if (recoverable) error in BUFR format.
                            Nothing more in this message/subset will be decoded.                            Nothing more in this message/subset will be decoded.
-   --on_error_stop Stop processing as soon as an error occurs during decoding +   --tableformat   Currently supported are BUFRDC and ECCODES (default is BUFRDC)
-   --all_operators Show all operator descriptors when printing section 4+
    --tablepath <path to BUFR tables>    --tablepath <path to BUFR tables>
                    Set path to BUFR tables (overrides ENV{BUFR_TABLES})                    Set path to BUFR tables (overrides ENV{BUFR_TABLES})
-   --verbose n     Set verbose level to n, 0<=n<=(default 0). n=1 will+   --verbose n     Set verbose level to n, 0<=n<=(default 0). n=1 will
                    show the tables loaded.                    show the tables loaded.
-   --help          Display Usage and explain the options used. For even +   --width n       Set width of field used for data values to n characters 
-                   more info you might prefer to consult perldoc bufrread.pl+                   (default is 15)
  
 Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>. Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>.
  
 To avoid having to use the C<--tablepath> option, you are adviced to To avoid having to use the C<--tablepath> option, you are adviced to
-set the invironment variable BUFR_TABLES to the directory where your+set the environment variable BUFR_TABLES to the directory where your
 BUFR tables are located (unless the default path provided by BUFR tables are located (unless the default path provided by
-bufrread.pl works for you).+bufrread.pl works for you). For tableformat ECCODES, se 
 +L<http://search.cpan.org/dist/Geo-BUFR/lib/Geo/BUFR.pm#BUFR-TABLE-FILES> 
 +for more info on how to set C<--tablepath> (or BUFR_TABLES). 
 + 
 +For option C<--ahl> the <ahl_regexp> should be a Perl regular 
 +expression. E.g. C<--ahl "ISS... ENMI"> will decode only BUFR SHIP 
 +(ISS) from CCCC=ENMI. This is the only case where a little knowledge 
 +of Perl might possibly be required when using the utility programs 
 +included in Geo::BUFR.
  
-Each line in <descriptor file> should start with a BUFR descriptor (6 +For option C<--param> each line in <descriptor file> should start with 
-digits).  Rest of line will be ignored. bufrread.pl will display values +a BUFR descriptor (6 digits).  Rest of line will be ignored. 
-for these descriptors only.+bufrread.pl will display values for these descriptors only.
  
 Using C<--filter> will decode only those observations that meet one of Using C<--filter> will decode only those observations that meet one of
-the criteria in <filter file> (and all of those criteria marked +the criteria in <filter file> marked D: and all of those criteria 
-D!). Comments (starting with #) are ignored. An example of a filter +marked D!:. Comments (starting with #) are ignored. An example of a 
-file is+filter file is
  
   D: 001001   D: 001001
Line 506: Line 576:
 which decodes all observations with block number 01, two other which decodes all observations with block number 01, two other
 specific WMO stations and one specific ship, all of which having hour specific WMO stations and one specific ship, all of which having hour
-(004004) equal to 6 or 7.  If there is no value line after a+(004004) equal to 6 or 7. If there is no value line after a
 descriptor line, it is enough that the observation contains the descriptor line, it is enough that the observation contains the
 descriptor(s), whatever the values are. So to extract all ship descriptor(s), whatever the values are. So to extract all ship
-messages from a BUFR SYNOP file, the filter file should contain this +messages from a BUFR file, the filter file should contain this single 
-single line only:+line only:
  
   D: 001011   D: 001011
  
 If an error occurs during decoding (typically because the required If an error occurs during decoding (typically because the required
-BUFR table is missing or message is corrupt) the message is skipped, +BUFR table is missing or message is corrupt)the BUFR message is 
-and the number of errors is reported at end of output. You can change +skipped with an error message printed to STDERR, and processing then 
-this default behaviour, however, by setting C<--on_error_stop>.+continues with the next BUFR message. You can change this default 
 +behaviour, however, by setting C<--on_error_stop>.
  
 =head1 CAVEAT =head1 CAVEAT
  
-Option --bitmap may not work properly for complicated BUFR messages.+Option C<--bitmapmay not work properly for complicated BUFR messages.
 Namely, when the first bit-map is encountered, no more data values (or Namely, when the first bit-map is encountered, no more data values (or
 their descriptors) will be displayed unless they refer to the their descriptors) will be displayed unless they refer to the
Line 534: Line 605:
 =head1 COPYRIGHT =head1 COPYRIGHT
  
-Copyright (C) 2010 met.no+Copyright (C) 2010-2023 MET Norway
  
 =cut =cut
 </code> </code>
  • bufr.pm/bufrread.pl.1379070526.txt.gz
  • Last modified: 2022-05-31 09:23:11
  • (external edit)