Differences

This shows you the differences between two versions of the page.

--- bufr.pm:bufrread.pl [2010-09-21 08:08:50]
pals old revision restored
+++ bufr.pm:bufrread.pl [2023-02-05 10:14:41] (current)
pals
@@ Line 1: / Line 1: @@
-<code>
+<code perl>
-#!/usr/bin/perl -w
+#!/usr/bin/perl
-# (C) Copyright 2010, met.no
+# (C) Copyright 2010-2023 MET Norway
 #
 # This program is free software; you can redistribute it and/or modify
@@ Line 22: / Line 22: @@
 use strict;
+use warnings;
 use Getopt::Long;
 use Pod::Usage qw(pod2usage);
 use Geo::BUFR;
+# This is actually default in BUFR.pm, but provided here to make it
+# easier for users to change to 'ECCODES' if preferred
+use constant DEFAULT_TABLE_FORMAT => 'BUFRDC';
 # Will be used if neither --tablepath nor $ENV{BUFR_TABLES} is set
-use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables';
+use constant DEFAULT_TABLE_PATH_BUFRDC => '/usr/local/lib/bufrtables';
-# Ought to be your most up-to-date C table
+use constant DEFAULT_TABLE_PATH_ECCODES => '/usr/local/share/eccodes/definitions/bufr/tables';
-use constant DEFAULT_CTABLE => 'C0000000000000014000';
+# Ought to be your most up-to-date code table(s)
+use constant DEFAULT_CTABLE_BUFRDC => 'C0000000000000037000';
+use constant DEFAULT_CTABLE_ECCODES => '0/wmo/37';
 # Parse command line options
@@ Line 35: / Line 42: @@
 GetOptions(
            \%option,
-           'all_operators',# Show all operator descriptors when printing section 4
+           'ahl=s',        # Decode BUFR messages with AHL matching <ahl_regexp> only
-           'bitmap',       # Display bit mapped values on same line
+           'all_operators',# Show replication descriptors and all operator descriptors
+                           # when printing section 4
+           'bitmap',       # Display bit-mapped values on same line
            'codetables',   # Use code and flag tables to resolve values
            'data_only',    # Print section 4 (data section) only
            'filter=s',     # Decode observations meeting criteria in <filter file> only
            'help',         # Print help information and exit
+           'nodata',       # Do not print (nor decode) section 4 (data section)
            'noqc',         # Do not decode quality control
+           'on_error_stop', # Stop processing if an error occurs
            'optional_section',  # Display a hex dump of optional section if present
            'outfile=s',    # Print to file instead of STDOUT
            'param=s',      # Decode parameters with descriptors in <descriptor file> only
            'strict_checking=i', # Enable/disable strict checking of BUFR format
+           'tableformat=s',  # Set BUFR table format
            'tablepath=s',  # Set BUFR table path
-           'verbose=i',    # Set verbose level to n, 0<=n<=3 (default 0)
+           'verbose=i',    # Set verbose level to n, 0<=n<=6 (default 0)
            'width=i',      # Set width of values field (default is 15 characters)
        ) or pod2usage(-verbose => 0);
@@ Line 57: / Line 69: @@
 pod2usage(-verbose => 0) unless @ARGV;
-# Set verbosity level for the BUFR module. Must be set also for each
+# Set verbosity level
-# BUFR object generated
+Geo::BUFR->set_verbose($option{verbose}) if $option{verbose};
-my $verbose = $option{verbose} ? $option{verbose} : 0;
-Geo::BUFR->set_verbose($verbose);
+# Set whether section 4 should be decoded for the BUFR module
+Geo::BUFR->set_nodata() if ($option{nodata});
 # Set whether quality information should be decoded for the BUFR module
@@ Line 68: / Line 81: @@
 Geo::BUFR->set_show_all_operators($option{all_operators}) if defined $option{all_operators};
+# Set BUFR table format
+my $tableformat = (defined $option{tableformat}) ? uc $option{tableformat} : DEFAULT_TABLE_FORMAT;
+Geo::BUFR->set_tableformat($tableformat);
 # Set BUFR table path
@@ Line 77: / Line 94: @@
     Geo::BUFR->set_tablepath($ENV{BUFR_TABLES});
 } else {
-    # If all else fails, use the libemos bufrtables
+    # If all else fails, use the default tablepath in BUFRDC/ECCODES
-    Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH);
+    if ($tableformat eq 'BUFRDC') {
+        Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH_BUFRDC);
+    } elsif ($tableformat eq 'ECCODES')  {
+        Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH_ECCODES);
+    }
+}
+my $ahl_regexp;
+if ($option{ahl}) {
+    eval { $ahl_regexp = qr/$option{ahl}/ };
+    die "Argument to --ahl is not a valid Perl regular expression: $@" if $@;
 }
@@ Line 96: / Line 123: @@
 # Arrays over filter criteria, used if option --filter is set
-my @fid;      # Filter descriptors, .e.g. $fid[1] = [ 001001, 001002 ]
+my @fid;      # Filter descriptors, e.g. $fid[1] = [ 001001, 001002 ]
 my @fiv;      # Filter values, e.g. $fiv[1] = [ [ 3, 895 ], [ 6 252 ] ]
 my @num_desc; # Number of filter descriptors for each criterion, e.g. $num_desc[1] = 2
@@ Line 115: / Line 142: @@
 foreach my $inputfname ( @ARGV ) {
     my $bufr = Geo::BUFR->new();
+    $bufr->set_filter_cb(\&filter_on_ahl,$ahl_regexp) if $option{ahl};
-    # This sets object verbose level equal to class verbose level
-    $bufr->set_verbose($verbose);
     # Open BUFR file
@@ Line 128: / Line 153: @@
-# Extract data from BUFR file. Print WMO ahl for first message in
+# Extract data from BUFR file. Print AHL for first message in each GTS
-# each WMO bulletin, print message number for each new message, print
+# bulletin, print message number for each new message, print subset
-# subset number for each subset.
+# number for each subset.
 sub decode {
     my $bufr = shift;          # BUFR object
@@ Line 143: / Line 168: @@
         # Read next observation. If an error is encountered during
         # decoding, skip this observation while printing the error
-        # message to STDERR, also displaying ahl of bulletin if found.
+        # message to STDERR, also displaying ahl of bulletin if found
+        # (but skip error message if the message should be skipped on
+        # --ahl anyway).
         my ($data, $descriptors);
         eval {
@@ Line 149: / Line 176: @@
         };
         if ($@) {
+            $current_ahl = $bufr->get_current_ahl() || '';
+            next READLOOP if $option{ahl} && $current_ahl !~ $ahl_regexp;
             warn $@;
             # Try to extract message number and ahl of the bulletin
             # where the error occurred
-            eval {
+            $current_message_number = $bufr->get_current_message_number();
-                $current_message_number = $bufr->get_current_message_number();
+            if (defined $current_message_number) {
-                $current_ahl = $bufr->get_current_ahl() || '';
+                my $error_msg = "In message $current_message_number";
-                my $error_msg;
-                $error_msg = "In message $current_message_number"
-                    if $current_message_number;
                 $error_msg .= " contained in bulletin with ahl $current_ahl\n"
                     if $current_ahl;
                 warn $error_msg if $error_msg;
-            };
+            }
+            exit(1) if $option{on_error_stop};
             next READLOOP;
         }
-        if ($option{codetables}) {
+        next if $option{ahl} && $bufr->is_filtered();
+        if ($option{codetables} && !$option{nodata}) {
             # Load C table, trying first to use same table version as
             # the B and D tables loaded in next_observation, or if
@@ Line 171: / Line 201: @@
             # instead.
             my $table_version = $bufr->get_table_version();
-            $bufr->load_Ctable("C$table_version", DEFAULT_CTABLE);
+            my $tableformat = Geo::BUFR->get_tableformat();
+            if ($tableformat eq 'BUFRDC') {
+                $bufr->load_Ctable("C$table_version", DEFAULT_CTABLE_BUFRDC);
+            } elsif ($tableformat eq 'ECCODES')  {
+                $bufr->load_Ctable("$table_version", DEFAULT_CTABLE_ECCODES);
+            }
         }
         my $current_subset_number = $bufr->get_current_subset_number();
-        my $nsubsets = $bufr->get_number_of_subsets();
+        # If next_observation() did find a BUFR message, subset number
+        # should have been set to at least 1 (even in a 0 subset message)
+        last READLOOP if $current_subset_number == 0;
-        if ($current_subset_number == 1) {
+        if ($current_subset_number == 1 || $option{nodata}) {
             $current_message_number = $bufr->get_current_message_number();
             $current_ahl = $bufr->get_current_ahl() || '';
@@ Line 198: / Line 235: @@
                 $section013_dumped = 1;
             }
+            next READLOOP if $option{nodata};
         } else { # subset number > 1
             next READLOOP if ($option{filter}
@@ Line 221: / Line 259: @@
         }
-        # If this is last message and there is a BUFR formatting
+        printf $OUT "\nSubset %d\n", $current_subset_number;
-        # error, we might end up here with current subset number 0
-        if ($current_subset_number > 0) {
+        # If an error is encountered during dumping of section 4, skip
-            printf $OUT "\nSubset %d\n", $current_subset_number;
+        # this subset while printing the error message to STDERR, also
-            if ($option{bitmap}) {
+        # displaying ahl of bulletin if found.
-                print $OUT $bufr->dumpsection4_with_bitmaps($data, $descriptors, $width);
+        my $dump;
-            } else {
+        eval {
-                print $OUT $bufr->dumpsection4($data, $descriptors, $width);
+            $dump = ( $option{bitmap} )
-            }
+                ? $bufr->dumpsection4_with_bitmaps($data, $descriptors,
+                                                   $current_subset_number, $width)
+                : $bufr->dumpsection4($data, $descriptors, $width);
+        };
+        if ($@) {
+            warn $@;
+            my $error_msg = "In message $current_message_number"
+                . " and subset $current_subset_number";
+            $error_msg .= " contained in bulletin with ahl $current_ahl\n"
+                if $current_ahl;
+            warn $error_msg;
+            exit(1) if $option{on_error_stop};
+            next READLOOP;
+        } else {
+            print $OUT $dump;
         }
     }
@@ Line 270: / Line 322: @@
 # Filter routines
-# Read in content of $filter_file into variables @fid, @fiv,
+sub filter_on_ahl {
+    my $obj = shift;
+    my $ahl_regexp = shift;
+    my $ahl = $obj->get_current_ahl() || '';
+    return $ahl =~ $ahl_regexp ? 0 : 1;
+}
+# Read in contents of $filter_file into variables @fid, @fiv,
 # @num_desc, @num_val and $num_criteria, which are defined above.
+# Note that index 0 of the arrays is not used.
 sub read_filter_file {
     my $filter_file = shift;
@@ Line 297: / Line 357: @@
         } else {
             my @values = split;
+            # Check that value line contains correct number of values
+            die "Number of values doesn't match number of descriptors"
+                . " for line $. in filter file '$filter_file'"
+                if scalar @values != scalar @{$fid[$num_criteria]};
+            # Remove leading 0's in numerical values (to prepare for string comparison)
+            for $_ (@values) { s/^0+(\d+)$/$1/ };
             $fiv[$num_criteria]->[++$num_val[$num_criteria]] = \@values;
         }
@@ Line 313: / Line 379: @@
     my ($data, $descriptors) = @_;
-    my $num_ordinary_criteria = @fid - $num_required_criteria;
+    my $num_ordinary_criteria = $#fid - $num_required_criteria;
     my $num_success_req_criteria = 0; # Number of required criteria successfully fulfilled
     my $num_success_ord_criteria = 0; # Number of ordinary criteria successfully fulfilled
@@ Line 358: / Line 424: @@
                     for (my $j = 0; $j < @{$descriptors}; $j++) {
                         if ($descriptors->[$j] == $filter_desc) {
+                            next DESC if !defined $data->[$j];
                             (my $val = $data->[$j]) =~ s/^\s*(.*?)\s*$/$1/;
                             if ($val eq $fiv[$filter_criterion]->[$line]->[$idesc]) {
@@ Line 370: / Line 437: @@
                                                  or $num_success_ord_criteria > 0)) {
                                             return 0; # Don't filter this observation
+                                        } else {
+                                            next DESC;
                                         }
                                     } else {
@@ Line 398: / Line 467: @@
 =pod
+=encoding utf8
 =head1 SYNOPSIS
   bufrread.pl <bufr file(s)>
+      [--ahl <ahl_regexp>]
+      [--all_operators]
+      [--bitmap]
       [--codetables]
       [--data_only]
-      [--param <descriptor file>]
       [--filter <filter file>]
-      [--bitmap]
+      [--help]
+      [--nodata]
       [--noqc]
-      [--outfile <filename>]
+      [--on_error_stop]
       [--optional_section]
-      [--width n]
+      [--outfile <filename>]
+      [--param <descriptor file>]
       [--strict_checking n]
-      [--all_operators]
+      [--tableformat <BUFRDC|ECCODES>]
       [--tablepath <path to BUFR tables>]
       [--verbose n]
-      [--help]
+      [--width n]
 =head1 DESCRIPTION
 Extract BUFR messages from BUFR file(s) and print the decoded content
-to screen. Will include WMO ahl if the BUFR message is part of a WMO
+to screen, including AHL (Abbreviated Header Line) if present.
-bulletin.
 Execute without arguments for Usage, with option C<--help> for some
-additional info. See also L</https://wiki.met.no/bufr.pm/start> for
+additional info. See also L<https://wiki.met.no/bufr.pm/start> for
 examples of use.
@@ Line 430: / Line 504: @@
 =head1 OPTIONS
+   --ahl <ahl_regexp>
+                   Decode BUFR messages with AHL matching <ahl_regexp> only
+   --all_operators Show replication descriptors and all operator descriptors
+                   when printing section 4
+   --bitmap        Display bit-mapped values on same line
    --codetables    Use code and flag tables to resolve values when unit
                    is [CODE TABLE] or [FLAG TABLE]
    --data_only     Print section 4 (data section) only
-   --param <descriptor file>
-                   Display parameters with descriptors in <descriptor file> only
    --filter <filter file>
                    Decode observations meeting criteria in <filter file> only
-   --bitmap        Display bit mapped values on same line
+   --help          Display Usage and explain the options used. For even
+                   more info you might prefer to consult perldoc bufrread.pl
+   --nodata        Do not print (nor decode) section 4 (data section)
    --noqc          Do not decode quality control
                    (or any descriptors following 222000)
+   --on_error_stop Stop processing as soon as an error occurs during decoding
    --outfile <filename>
                    Will print to <filename> instead of STDOUT
    --optional_section
                    Display a hex dump of optional section if present
-   --width n       Set width of field used for data values to n characters
+   --param <descriptor file>
-                   (default is 15)
+                   Display parameters with descriptors in <descriptor file> only
    --strict_checking n n=0 (default) Disable strict checking of BUFR format
                        n=1 Issue warning if (recoverable) error in
                            BUFR format
                        n=2 Croak if (recoverable) error in BUFR format.
-                           Nothing more in this message will be decoded.
+                           Nothing more in this message/subset will be decoded.
-   --all_operators Show all operator descriptors when printing section 4
+   --tableformat   Currently supported are BUFRDC and ECCODES (default is BUFRDC)
    --tablepath <path to BUFR tables>
                    Set path to BUFR tables (overrides ENV{BUFR_TABLES})
-   --verbose n     Set verbose level to n, 0<=n<=5 (default 0). n=1 will
+   --verbose n     Set verbose level to n, 0<=n<=6 (default 0). n=1 will
                    show the tables loaded.
-   --help          Display Usage and explain the options used. For even
+   --width n       Set width of field used for data values to n characters
-                   more info you might prefer to consult perldoc bufrread.pl
+                   (default is 15)
 Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>.
 To avoid having to use the C<--tablepath> option, you are adviced to
-set the invironment variable BUFR_TABLES to the directory where your
+set the environment variable BUFR_TABLES to the directory where your
 BUFR tables are located (unless the default path provided by
-bufralter.pl works for you).
+bufrread.pl works for you). For tableformat ECCODES, se
+L<http://search.cpan.org/dist/Geo-BUFR/lib/Geo/BUFR.pm#BUFR-TABLE-FILES>
+for more info on how to set C<--tablepath> (or BUFR_TABLES).
-Each line in <descriptor file> should start with a BUFR descriptor (6
+For option C<--ahl> the <ahl_regexp> should be a Perl regular
-digits).  Rest of line will be ignored. bufrread.pl will display values
+expression. E.g. C<--ahl "ISS... ENMI"> will decode only BUFR SHIP
-for these descriptors only.
+(ISS) from CCCC=ENMI. This is the only case where a little knowledge
+of Perl might possibly be required when using the utility programs
+included in Geo::BUFR.
+For option C<--param> each line in <descriptor file> should start with
+a BUFR descriptor (6 digits).  Rest of line will be ignored.
+bufrread.pl will display values for these descriptors only.
 Using C<--filter> will decode only those observations that meet one of
-the criteria in <filter file> (and all of those criteria marked
+the criteria in <filter file> marked D: and all of those criteria
-D!). Comments (starting with #) are ignored. An example of a filter
+marked D!:. Comments (starting with #) are ignored. An example of a
-file is
+filter file is
   D: 001001
@@ Line 488: / Line 576: @@
 which decodes all observations with block number 01, two other
 specific WMO stations and one specific ship, all of which having hour
-(004004) equal to 6 or 7.  If there is no value line after a
+(004004) equal to 6 or 7. If there is no value line after a
 descriptor line, it is enough that the observation contains the
 descriptor(s), whatever the values are. So to extract all ship
-messages from a BUFR SYNOP file, the filter file should contain this
+messages from a BUFR file, the filter file should contain this single
-single line only:
+line only:
   D: 001011
 If an error occurs during decoding (typically because the required
-BUFR table is missing or message is corrupt) the message is skipped,
+BUFR table is missing or message is corrupt), the BUFR message is
-and the number of errors is reported at end of output.
+skipped with an error message printed to STDERR, and processing then
+continues with the next BUFR message. You can change this default
+behaviour, however, by setting C<--on_error_stop>.
+=head1 CAVEAT
+Option C<--bitmap> may not work properly for complicated BUFR messages.
+Namely, when the first bit-map is encountered, no more data values (or
+their descriptors) will be displayed unless they refer to the
+preceding data values by a bit-map. And output is not to be trusted
+if a bit-map refers to another bit-map or the bit-mapped values are
+combined with 204YYY (add associated field operator).
 =head1 AUTHOR
@@ Line 506: / Line 605: @@
 =head1 COPYRIGHT
-Copyright (C) 2010 met.no
+Copyright (C) 2010-2023 MET Norway
 =cut
 </code>