#!/usr/bin/perl # (C) Copyright 2010-2023 MET Norway # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # pod included at end of file use strict; use warnings; use Getopt::Long; use Pod::Usage qw(pod2usage); use Geo::BUFR; # This is actually default in BUFR.pm, but provided here to make it # easier for users to change to 'ECCODES' if preferred use constant DEFAULT_TABLE_FORMAT => 'BUFRDC'; # Will be used if neither --tablepath nor $ENV{BUFR_TABLES} is set use constant DEFAULT_TABLE_PATH_BUFRDC => '/usr/local/lib/bufrtables'; use constant DEFAULT_TABLE_PATH_ECCODES => '/usr/local/share/eccodes/definitions/bufr/tables'; # Ought to be your most up-to-date code table(s) use constant DEFAULT_CTABLE_BUFRDC => 'C0000000000000037000'; use constant DEFAULT_CTABLE_ECCODES => '0/wmo/37'; # Parse command line options my %option = (); GetOptions( \%option, 'ahl=s', # Decode BUFR messages with AHL matching only 'all_operators',# Show replication descriptors and all operator descriptors # when printing section 4 'bitmap', # Display bit-mapped values on same line 'codetables', # Use code and flag tables to resolve values 'data_only', # Print section 4 (data section) only 'filter=s', # Decode observations meeting criteria in only 'help', # Print help information and exit 'nodata', # Do not print (nor decode) section 4 (data section) 'noqc', # Do not decode quality control 'on_error_stop', # Stop processing if an error occurs 'optional_section', # Display a hex dump of optional section if present 'outfile=s', # Print to file instead of STDOUT 'param=s', # Decode parameters with descriptors in only 'strict_checking=i', # Enable/disable strict checking of BUFR format 'tableformat=s', # Set BUFR table format 'tablepath=s', # Set BUFR table path 'verbose=i', # Set verbose level to n, 0<=n<=6 (default 0) 'width=i', # Set width of values field (default is 15 characters) ) or pod2usage(-verbose => 0); # User asked for help pod2usage(-verbose => 1) if $option{help}; # Make sure there is at least one input file pod2usage(-verbose => 0) unless @ARGV; # Set verbosity level Geo::BUFR->set_verbose($option{verbose}) if $option{verbose}; # Set whether section 4 should be decoded for the BUFR module Geo::BUFR->set_nodata() if ($option{nodata}); # Set whether quality information should be decoded for the BUFR module Geo::BUFR->set_noqc() if ($option{noqc}); Geo::BUFR->set_strict_checking($option{strict_checking}) if defined $option{strict_checking}; Geo::BUFR->set_show_all_operators($option{all_operators}) if defined $option{all_operators}; # Set BUFR table format my $tableformat = (defined $option{tableformat}) ? uc $option{tableformat} : DEFAULT_TABLE_FORMAT; Geo::BUFR->set_tableformat($tableformat); # Set BUFR table path if ($option{tablepath}) { # Command line option --tablepath overrides all Geo::BUFR->set_tablepath($option{tablepath}); } elsif ($ENV{BUFR_TABLES}) { # If no --tablepath option, use the BUFR_TABLES environment variable Geo::BUFR->set_tablepath($ENV{BUFR_TABLES}); } else { # If all else fails, use the default tablepath in BUFRDC/ECCODES if ($tableformat eq 'BUFRDC') { Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH_BUFRDC); } elsif ($tableformat eq 'ECCODES') { Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH_ECCODES); } } my $ahl_regexp; if ($option{ahl}) { eval { $ahl_regexp = qr/$option{ahl}/ }; die "Argument to --ahl is not a valid Perl regular expression: $@" if $@; } # Where to direct output (including verbose output, but not output to STDERR) my $OUT; if ($option{outfile}) { open($OUT, '>', $option{outfile}) or die "Cannot open $option{outfile} for writing: $!"; } else { $OUT = *STDOUT; } my @requested_desc; if ($option{param}) { @requested_desc = read_descriptor_file($option{param}); } # Arrays over filter criteria, used if option --filter is set my @fid; # Filter descriptors, e.g. $fid[1] = [ 001001, 001002 ] my @fiv; # Filter values, e.g. $fiv[1] = [ [ 3, 895 ], [ 6 252 ] ] my @num_desc; # Number of filter descriptors for each criterion, e.g. $num_desc[1] = 2 my @num_val; # Number of filter value lines for each criterion, e.g. $num_val[1] = 2 my @required; # 1 for required criteria (D!: in filter file), 0 for others my $num_criteria = 0; my $num_required_criteria = 0; if ($option{filter}) { read_filter_file($option{filter}); } my $width = $option{width} ? $option{width} : 15; # Used to display section 2 if --optional_section is set my $sec2_code_ref = sub {return ' Hex dump:'.' 'x26 . unpack('H*',substr(shift,4))}; # Loop for processing of BUFR input files foreach my $inputfname ( @ARGV ) { my $bufr = Geo::BUFR->new(); $bufr->set_filter_cb(\&filter_on_ahl,$ahl_regexp) if $option{ahl}; # Open BUFR file $bufr->fopen($inputfname); # Process input file decode($bufr); $bufr->fclose(); } # Extract data from BUFR file. Print AHL for first message in each GTS # bulletin, print message number for each new message, print subset # number for each subset. sub decode { my $bufr = shift; # BUFR object my ($message_header, $current_message_number, $current_ahl); my $section013_dumped = 0; # Used to keep track of whether sections # 0-3 have been printed when --filter # option has been used READLOOP: while (not $bufr->eof()) { # Read next observation. If an error is encountered during # decoding, skip this observation while printing the error # message to STDERR, also displaying ahl of bulletin if found # (but skip error message if the message should be skipped on # --ahl anyway). my ($data, $descriptors); eval { ($data, $descriptors) = $bufr->next_observation(); }; if ($@) { $current_ahl = $bufr->get_current_ahl() || ''; next READLOOP if $option{ahl} && $current_ahl !~ $ahl_regexp; warn $@; # Try to extract message number and ahl of the bulletin # where the error occurred $current_message_number = $bufr->get_current_message_number(); if (defined $current_message_number) { my $error_msg = "In message $current_message_number"; $error_msg .= " contained in bulletin with ahl $current_ahl\n" if $current_ahl; warn $error_msg if $error_msg; } exit(1) if $option{on_error_stop}; next READLOOP; } next if $option{ahl} && $bufr->is_filtered(); if ($option{codetables} && !$option{nodata}) { # Load C table, trying first to use same table version as # the B and D tables loaded in next_observation, or if # this C table file does not exist, loads DEFAULT_CTABLE # instead. my $table_version = $bufr->get_table_version(); my $tableformat = Geo::BUFR->get_tableformat(); if ($tableformat eq 'BUFRDC') { $bufr->load_Ctable("C$table_version", DEFAULT_CTABLE_BUFRDC); } elsif ($tableformat eq 'ECCODES') { $bufr->load_Ctable("$table_version", DEFAULT_CTABLE_ECCODES); } } my $current_subset_number = $bufr->get_current_subset_number(); # If next_observation() did find a BUFR message, subset number # should have been set to at least 1 (even in a 0 subset message) last READLOOP if $current_subset_number == 0; if ($current_subset_number == 1 || $option{nodata}) { $current_message_number = $bufr->get_current_message_number(); $current_ahl = $bufr->get_current_ahl() || ''; $message_header = sprintf "\nMessage %d", $current_message_number; $message_header .= (defined $current_ahl) ? " $current_ahl\n" : "\n"; $section013_dumped = 0; next READLOOP if ($option{filter} && filter_observation($bufr, $data, $descriptors)); print $OUT $message_header; if (not $option{data_only}) { print $OUT $bufr->dumpsection0(); print $OUT $bufr->dumpsection1(); print $OUT $bufr->dumpsection2($sec2_code_ref) if $option{optional_section}; print $OUT $bufr->dumpsection3(); $section013_dumped = 1; } next READLOOP if $option{nodata}; } else { # subset number > 1 next READLOOP if ($option{filter} && filter_observation($bufr, $data, $descriptors)); # If subset 1 was filtered away, section 0-3 might not # have been printed yet if ($option{filter} and not $option{data_only} and not $section013_dumped) { print $OUT $bufr->dumpsection0(); print $OUT $bufr->dumpsection1(); print $OUT $bufr->dumpsection2($sec2_code_ref) if $option{optional_section}; print $OUT $bufr->dumpsection3(); $section013_dumped = 1; } } if ($option{param}) { # Reduce data and descriptors to those requested only ($data, $descriptors) = param($data, $descriptors, @requested_desc); } printf $OUT "\nSubset %d\n", $current_subset_number; # If an error is encountered during dumping of section 4, skip # this subset while printing the error message to STDERR, also # displaying ahl of bulletin if found. my $dump; eval { $dump = ( $option{bitmap} ) ? $bufr->dumpsection4_with_bitmaps($data, $descriptors, $current_subset_number, $width) : $bufr->dumpsection4($data, $descriptors, $width); }; if ($@) { warn $@; my $error_msg = "In message $current_message_number" . " and subset $current_subset_number"; $error_msg .= " contained in bulletin with ahl $current_ahl\n" if $current_ahl; warn $error_msg; exit(1) if $option{on_error_stop}; next READLOOP; } else { print $OUT $dump; } } } sub read_descriptor_file { my $descriptor_file = shift; open my $fh, '<', $descriptor_file or die "Cannot open $descriptor_file: $!"; my @requested_desc; while (<$fh>) { next unless /^\s*(\d{6})/; push @requested_desc, $1; } close $fh or die "Cannot close $descriptor_file: $!"; return @requested_desc; } # Reduce the data to those corresponding to the requested descriptors # only. sub param { my ($data, $descriptors, @requested_desc) = @_; my (@req_data, @req_desc); my $i = 0; foreach my $id ( @{$descriptors} ) { if (grep { $id == $_ } @requested_desc) { push @req_data, $data->[$i]; push @req_desc, $id; } $i++; } return (\@req_data, \@req_desc); } ################################################################################### # Filter routines sub filter_on_ahl { my $obj = shift; my $ahl_regexp = shift; my $ahl = $obj->get_current_ahl() || ''; return $ahl =~ $ahl_regexp ? 0 : 1; } # Read in contents of $filter_file into variables @fid, @fiv, # @num_desc, @num_val and $num_criteria, which are defined above. # Note that index 0 of the arrays is not used. sub read_filter_file { my $filter_file = shift; open my $fh, '<', $filter_file or die "Cannot open $filter_file: $!"; while (<$fh>) { # Remove comments and skip blank lines s/#.*//; next if /^\s*$/; if (s/^\s*D(!)?://) { my @desc = split; # Check that all descriptors are numbers foreach my $desc (@desc) { die "'$desc' cannot be a descriptor in line $. in filter file '$filter_file'" if $desc !~/^\d+$/; } # Save the criterium $num_desc[++$num_criteria] = @desc; $num_val[$num_criteria] = 0; $fid[$num_criteria] = \@desc; $required[$num_criteria] = $1 ? 1 : 0; $num_required_criteria++ if $1; } else { my @values = split; # Check that value line contains correct number of values die "Number of values doesn't match number of descriptors" . " for line $. in filter file '$filter_file'" if scalar @values != scalar @{$fid[$num_criteria]}; # Remove leading 0's in numerical values (to prepare for string comparison) for $_ (@values) { s/^0+(\d+)$/$1/ }; $fiv[$num_criteria]->[++$num_val[$num_criteria]] = \@values; } } close $fh or die "Cannot close $filter_file: $!"; return; } # Return true (observations should be filtered) if the observation # does not meet all of the D! criteria (if exists) and does not meet # any one of the other criteria (if exists) in filter file. sub filter_observation { my $bufr = shift; die "Error in filter_observation: argument not a BUFR object" unless ref($bufr) eq 'Geo::BUFR'; my ($data, $descriptors) = @_; my $num_ordinary_criteria = $#fid - $num_required_criteria; my $num_success_req_criteria = 0; # Number of required criteria successfully fulfilled my $num_success_ord_criteria = 0; # Number of ordinary criteria successfully fulfilled # loop through all different criteria: CRITERIA: foreach my $filter_criterion (1 .. $num_criteria) { if ($num_val[$filter_criterion] == 0) { # Enough to check that the descriptor(s) are present in observation my $nmatch = 0; # loop through all descriptors in criterion: foreach my $idesc (0 .. $num_desc[$filter_criterion] - 1) { my $filter_desc = $fid[$filter_criterion]->[$idesc]; for (my $j = 0; $j < @{$descriptors}; $j++) { if ($descriptors->[$j] == $filter_desc) { $nmatch++; # Matched! if ($nmatch == $num_desc[$filter_criterion]) { # All descriptors for this line in this criterion matched. # Do we need to check more criteria? if ($required[$filter_criterion]) { $num_success_req_criteria++; if ($num_success_req_criteria == $num_required_criteria and ($num_ordinary_criteria == 0 or $num_success_ord_criteria > 0)) { return 0; # Don't filter this observation } } else { $num_success_ord_criteria++; if ($num_success_req_criteria == $num_required_criteria) { return 0; # Don't filter this observation } } } } } } } else { # loop through all filter values lines (for given) criterion: LINE: foreach my $line (1 .. $num_val[$filter_criterion]) { my $nmatch = 0; # loop through all descriptors in criterion: DESC: foreach my $idesc (0 .. $num_desc[$filter_criterion] - 1) { my $filter_desc = $fid[$filter_criterion]->[$idesc]; # loop through all data in subset: for (my $j = 0; $j < @{$descriptors}; $j++) { if ($descriptors->[$j] == $filter_desc) { next DESC if !defined $data->[$j]; (my $val = $data->[$j]) =~ s/^\s*(.*?)\s*$/$1/; if ($val eq $fiv[$filter_criterion]->[$line]->[$idesc]) { $nmatch++; # Matched! if ($nmatch == $num_desc[$filter_criterion]) { # All descriptors for this line in this criterion matched. # Do we need to check more criteria? if ($required[$filter_criterion]) { $num_success_req_criteria++; if ($num_success_req_criteria == $num_required_criteria and ($num_ordinary_criteria == 0 or $num_success_ord_criteria > 0)) { return 0; # Don't filter this observation } else { next DESC; } } else { $num_success_ord_criteria++; if ($num_success_req_criteria == $num_required_criteria) { return 0; # Don't filter this observation } } } else { next DESC; } } else { # Found the descriptor, but wrong value next LINE; } } } } # End of filter descriptor loop } # End of value line loop } } # End of criteria loop # One required criterion not fulfilled, or if there are no # required criteria: none of the non-required criteria fulfilled # (so the observation should be filtered away) return 1; } =pod =encoding utf8 =head1 SYNOPSIS bufrread.pl [--ahl ] [--all_operators] [--bitmap] [--codetables] [--data_only] [--filter ] [--help] [--nodata] [--noqc] [--on_error_stop] [--optional_section] [--outfile ] [--param ] [--strict_checking n] [--tableformat ] [--tablepath ] [--verbose n] [--width n] =head1 DESCRIPTION Extract BUFR messages from BUFR file(s) and print the decoded content to screen, including AHL (Abbreviated Header Line) if present. Execute without arguments for Usage, with option C<--help> for some additional info. See also L for examples of use. =head1 OPTIONS --ahl Decode BUFR messages with AHL matching only --all_operators Show replication descriptors and all operator descriptors when printing section 4 --bitmap Display bit-mapped values on same line --codetables Use code and flag tables to resolve values when unit is [CODE TABLE] or [FLAG TABLE] --data_only Print section 4 (data section) only --filter Decode observations meeting criteria in only --help Display Usage and explain the options used. For even more info you might prefer to consult perldoc bufrread.pl --nodata Do not print (nor decode) section 4 (data section) --noqc Do not decode quality control (or any descriptors following 222000) --on_error_stop Stop processing as soon as an error occurs during decoding --outfile Will print to instead of STDOUT --optional_section Display a hex dump of optional section if present --param Display parameters with descriptors in only --strict_checking n n=0 (default) Disable strict checking of BUFR format n=1 Issue warning if (recoverable) error in BUFR format n=2 Croak if (recoverable) error in BUFR format. Nothing more in this message/subset will be decoded. --tableformat Currently supported are BUFRDC and ECCODES (default is BUFRDC) --tablepath Set path to BUFR tables (overrides ENV{BUFR_TABLES}) --verbose n Set verbose level to n, 0<=n<=6 (default 0). n=1 will show the tables loaded. --width n Set width of field used for data values to n characters (default is 15) Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>. To avoid having to use the C<--tablepath> option, you are adviced to set the environment variable BUFR_TABLES to the directory where your BUFR tables are located (unless the default path provided by bufrread.pl works for you). For tableformat ECCODES, se L for more info on how to set C<--tablepath> (or BUFR_TABLES). For option C<--ahl> the should be a Perl regular expression. E.g. C<--ahl "ISS... ENMI"> will decode only BUFR SHIP (ISS) from CCCC=ENMI. This is the only case where a little knowledge of Perl might possibly be required when using the utility programs included in Geo::BUFR. For option C<--param> each line in should start with a BUFR descriptor (6 digits). Rest of line will be ignored. bufrread.pl will display values for these descriptors only. Using C<--filter> will decode only those observations that meet one of the criteria in marked D: and all of those criteria marked D!:. Comments (starting with #) are ignored. An example of a filter file is D: 001001 1 D: 001001 001002 3 895 6 252 D: 001011 LF5U # Ekofisk D!: 004004 6 7 which decodes all observations with block number 01, two other specific WMO stations and one specific ship, all of which having hour (004004) equal to 6 or 7. If there is no value line after a descriptor line, it is enough that the observation contains the descriptor(s), whatever the values are. So to extract all ship messages from a BUFR file, the filter file should contain this single line only: D: 001011 If an error occurs during decoding (typically because the required BUFR table is missing or message is corrupt), the BUFR message is skipped with an error message printed to STDERR, and processing then continues with the next BUFR message. You can change this default behaviour, however, by setting C<--on_error_stop>. =head1 CAVEAT Option C<--bitmap> may not work properly for complicated BUFR messages. Namely, when the first bit-map is encountered, no more data values (or their descriptors) will be displayed unless they refer to the preceding data values by a bit-map. And output is not to be trusted if a bit-map refers to another bit-map or the bit-mapped values are combined with 204YYY (add associated field operator). =head1 AUTHOR Pål Sannes Epal.sannes@met.noE =head1 COPYRIGHT Copyright (C) 2010-2023 MET Norway =cut