This is an old revision of the document!
#!/usr/bin/perl -w # (C) Copyright 2010, met.no # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # Usage: bufrread <bufr file(s)> [options] # Extract Bufr messages from bufr file(s) and decode them. # See subroutine usage_verbose for explanation of the options. # Author: P.Sannes met.no 2010 use strict; use Getopt::Long; # metno module use BUFR; my $DEFAULT_TABLE_PATH = '/usr/local/lib/emos/bufrtables'; my $DEFAULT_CTABLE = 'C0000000000088013001'; # Parse command line options my %option = (); GetOptions( \%option, 'all_operators',# Show all operator descriptors when printing section 4 'bitmap', # Display bit mapped values on same line 'codetables', # Use code and flag tables to resolve values 'data_only', # Print section 4 (data section) only 'filter=s', # Decode observations meeting criteria in <filter file> only 'help', # Print help information and exit 'noqc', # Do not decode quality control 'optional_section', # Display a hex dump of optional section if present 'outfile=s', # Print to file instead of STDOUT 'param=s', # Decode parameters with descriptors in <descriptor file> only 'strict_checking=i', # Enable/disable strict checking of BUFR format 'tablepath=s', # Set BUFR table path 'verbose=i', # Set verbose level to n, 0<=n<=3 (default 0) 'width=i', # Set width of values field (default is 15 characters) ) or die "Wrong option(s), execute $0 without arguments for Usage\n"; # User asked for help usage_verbose() if $option{help}; # Make sure there is at least one input file usage() unless @ARGV; my $verbose = $option{verbose} ? $option{verbose} : 0; # Set verbosity level for the BUFR module. Must be set also for each # BUFR object generated BUFR->set_verbose($verbose); # Set whether quality information should be decoded for the BUFR module BUFR->set_noqc() if ($option{noqc}); BUFR->set_strict_checking($option{strict_checking}) if defined $option{strict_checking}; BUFR->set_show_all_operators($option{all_operators}) if defined $option{all_operators}; # Set BUFR table path if ($option{tablepath}) { # Command line option --tablepath overrides all BUFR->set_tablepath($option{tablepath}); } elsif ($ENV{BUFR_TABLES}) { # If no --tablepath option, use the BUFR_TABLES environment variable BUFR->set_tablepath($ENV{BUFR_TABLES}); } else { # If all else fails, use the libemos bufrtables BUFR->set_tablepath($DEFAULT_TABLE_PATH); } # Where to direct output (including verbose output, but not output to STDERR) my $OUT; if ($option{outfile}) { open($OUT, '>',$option{outfile}) or die "Cannot open $option{outfile} for writing: $!"; } else { $OUT = *STDOUT; } my @requested_desc; if ($option{param}) { @requested_desc = read_descriptor_file($option{param}); } # Arrays over filter criteria, used if option --filter is set my @fid; # Filter descriptors, .e.g. $fid[1] = [ 001001, 001002 ] my @fiv; # Filter values, e.g. $fiv[1] = [ [ 3, 895 ], [ 6 252 ] ] my @num_desc; # Number of filter descriptors for each criterion, e.g. $num_desc[1] = 2 my @num_val; # Number of filter value lines for each criterion, e.g. $num_val[1] = 2 my @required; # 1 for required criteria (D!: in filter file), 0 for others my $num_criteria = 0; my $num_required_criteria = 0; if ($option{filter}) { read_filter_file($option{filter}); } my $width = $option{width} ? $option{width} : 15; # Used to display section 2 if --optional_section is set my $sec2_code_ref = sub {return ' Hex dump:'.' 'x26 . unpack('H*',substr(shift,4))}; # Loop for processing of BUFR input files foreach my $inputfname ( @ARGV ) { my $bufr = BUFR->new(); # This sets object verbose level equal to class verbose level $bufr->set_verbose($verbose); if ($option{codetables}) { # Load C table. So far, I don't see the need for user to # choose another C table than the default table $bufr->load_Ctable($DEFAULT_CTABLE) or die "Unable to load table $DEFAULT_CTABLE"; } # Open BUFR file $bufr->fopen($inputfname); # Process input file decode($bufr); $bufr->fclose(); } # Extract data from BUFR file. # Print WMO ahl for first message in each WMO bulletin, print # message number for each new message, print subset number # if message contains more than one subset sub decode { my $bufr = shift; # BUFR object my ($message_header, $current_message_number, $current_ahl); my $section013_dumped = 0; # Used to keep track of whether sections # 0-3 have been printed when --filter # option has been used READLOOP: while (not $bufr->feof()) { # Read next observation. If an error is encountered during # decoding, skip this observation while printing the error # message to STDERR, also displaying ahl of bulletin if found. my ($data, $descriptors); eval { ($data, $descriptors) = $bufr->next_observation(); }; if ($@) { warn $@; # Try to extract message number and ahl of the bulletin # where the error occurred eval { $current_message_number = $bufr->get_current_message_number(); $current_ahl = $bufr->get_current_ahl() || ''; my $error_msg; $error_msg = "In message $current_message_number" if $current_message_number; $error_msg .= " contained in bulletin with ahl $current_ahl\n" if $current_ahl; warn $error_msg if $error_msg; }; next READLOOP; } if ($option{param}) { # Reduce data and descritors to those requested only ($data, $descriptors) = param($data, $descriptors, @requested_desc); } my $current_subset_number = $bufr->get_current_subset_number(); my $nsubsets = $bufr->get_number_of_subsets(); if ($current_subset_number == 1) { $current_message_number = $bufr->get_current_message_number(); $current_ahl = $bufr->get_current_ahl() || ''; $message_header = sprintf "\nMessage %d", $current_message_number; $message_header .= (defined $current_ahl) ? " $current_ahl\n" : "\n"; $section013_dumped = 0; next READLOOP if $option{filter} and filter_observation($bufr, $data, $descriptors); print $OUT $message_header; if (not $option{data_only}) { print $OUT $bufr->dumpsection0(); print $OUT $bufr->dumpsection1(); print $OUT $bufr->dumpsection2($sec2_code_ref) if $option{optional_section}; print $OUT $bufr->dumpsection3(); $section013_dumped = 1; } } else { # subset number > 1 next READLOOP if $option{filter} and filter_observation($bufr, $data, $descriptors); # If subset 1 was filtered away, section 0-3 might not # have been printed yet if ($option{filter} and not $option{data_only} and not $section013_dumped) { print $OUT $bufr->dumpsection0(); print $OUT $bufr->dumpsection1(); print $OUT $bufr->dumpsection2($sec2_code_ref) if $option{optional_section}; print $OUT $bufr->dumpsection3(); $section013_dumped = 1; } } if ($nsubsets > 1) { printf $OUT "\nSubset %d\n", $current_subset_number; } if ($option{bitmap}) { print $OUT $bufr->dumpsection4_with_bitmaps($data, $descriptors, $width); } else { print $OUT $bufr->dumpsection4($data, $descriptors, $width); } } } sub read_descriptor_file { my $descriptor_file = shift; open my $fh, '<', $descriptor_file or die "Cannot open $descriptor_file: $!"; my @requested_desc; while (<$fh>) { next unless /^\s*(\d{6})/; push @requested_desc, $1; } return @requested_desc; } # Reduce the data to those corresponding to the requested descriptors # only sub param { my ($data, $descriptors, @requested_desc) = @_; my (@req_data, @req_desc); my $i = 0; foreach my $id ( @{$descriptors} ) { if (grep { $id == $_ } @requested_desc) { push @req_data, $data->[$i]; push @req_desc, $id; } $i++; } return (\@req_data, \@req_desc); } sub usage { print <<"EOF"; Usage: $0 <bufr file(s)> [--codetables] [--data_only] [--param <descriptor file>] [--filter <filter file>] [--bitmap] [--noqc] [--outfile <filename>] [--optional_section] [--width n] [--strict_checking n] [--all_operators] [--tablepath <path to BUFR tables>] [--verbose n] [--help] Try 'bufrread.pl --help' for more information. EOF exit 0; } sub usage_verbose { print <<"EOF"; Usage: $0 <bufr file(s)> [options] Will print section 0-4 in BUFR messages in <bufr file(s)>. Options (may be abbreviated, e.g. --h or -h for --help) are: --codetables Use code and flag tables to resolve values where unit is [CODE TABLE] or [FLAG TABLE] --data_only Print section 4 (data section) only --param <descriptor file> Decode parameters with descriptors in <descriptor file> only --filter <filter file> Decode observations meeting criteria in <filter file> only --bitmap Display bit mapped values on same line --noqc Do not decode quality control (or any descriptors following 222000) --outfile <filename> Will print to <filename> instead of STDOUT --optional_section Display a hex dump of optional section if present --width n Set width of field used for data values to n characters (default is 15) --strict_checking n n=0 (default) Disable strict checking of BUFR format n=1 Issue warning if (recoverable) error in BUFR format n=2 Croak if (recoverable) error in BUFR format. Nothing more in this message will be decoded. --all_operators Show all operator descriptors when printing section 4 --tablepath <path to BUFR tables> Set path to BUFR tables (overrides ENV{BUFR_TABLES}) --verbose n Set verbose level to n, 0<=n<=3 (default 0) --help Print this Usage You should probably set export BUFR_TABLES=$DEFAULT_TABLE_PATH or use the --tablepath option. Each line in <descriptor file> should start with a BUFR descriptor (6 digits). Rest of line will be ignored. bufrread will extract values for these descriptors only. If used together with --filter, <descriptor file> must contain all descriptors in <filter file>. Using --filter will decode only those observations that meet one of the criteria in <filter file> (and all of those criteria marked D!). Comments (starting with #) are ignored. An example of a filter file is # All stations in WMO block 01 D: 001001 1 D: 001001 001002 3 895 6 252 D: 001011 LF5U # Ekofisk D!: 004004 6 7 which decodes all observations with block number 01, two other specific wmo stations and one specific ship, all of which having hour (004004) equal to 6 or 7. If there is no value line after a descriptor line, it is enough that the observation contains the descriptor(s), whatever the values are. So to extract all ship messages from a BUFR synop file, the filter file should contain this single line only: D: 001011 If an error occurs during decoding (typically because the required BUFR table is missing or message is corrupt) the message is skipped, and the number of errors is reported at end of output. EOF exit 0; } ################################################################################### # Filter routines # Read in content of $filter_file into variables @fid, @fiv, # @num_desc, @num_val and $num_criteria, which are defined above. sub read_filter_file { my $filter_file = shift; open my $fh, '<', $filter_file or die "Cannot open $filter_file: $!"; while (<$fh>) { # Remove comments s/#.*//; next if /^$/; if (s/^\s*D(!)?://) { my @desc = split; $num_desc[++$num_criteria] = @desc; $num_val[$num_criteria] = 0; $fid[$num_criteria] = \@desc; $required[$num_criteria] = $1 ? 1 : 0; $num_required_criteria++ if $1; } else { my @values = split; $fiv[$num_criteria]->[++$num_val[$num_criteria]] = \@values; } } return; } # Return true (observations should be filtered) if the observation # does not meet all of the D! criteria (if exists) and does not meet # any one of the other criteria (if exists) in filter file sub filter_observation { my $bufr = shift; die "Error in filter_observation: argument not a BUFR object" unless ref($bufr) eq 'BUFR'; my ($data, $descriptors) = @_; my $num_ordinary_criteria = @fid - $num_required_criteria; my $num_success_req_criteria = 0; # Number of required criteria successfully fulfilled my $num_success_ord_criteria = 0; # Number of ordinary criteria successfully fulfilled # loop through all different criteria: CRITERIA: foreach my $filter_criterion (1 .. $num_criteria) { if ($num_val[$filter_criterion] == 0) { # Enough to check that the descriptor(s) are present in observation my $nmatch = 0; # loop through all descriptors in criterion: foreach my $idesc (0 .. $num_desc[$filter_criterion] - 1) { my $filter_desc = $fid[$filter_criterion]->[$idesc]; for (my $j = 0; $j < @{$descriptors}; $j++) { if ($descriptors->[$j] == $filter_desc) { $nmatch++; # Matched! if ($nmatch == $num_desc[$filter_criterion]) { # All descriptors for this line in this criterion matched. # Do we need to check more criteria? if ($required[$filter_criterion]) { $num_success_req_criteria++; if ($num_success_req_criteria == $num_required_criteria and ($num_ordinary_criteria == 0 or $num_success_ord_criteria > 0)) { return 0; # Don't filter this observation } } else { $num_success_ord_criteria++; if ($num_success_req_criteria == $num_required_criteria) { return 0; # Don't filter this observation } } } } } } } else { # loop through all filter values lines (for given) criterion: LINE: foreach my $line (1 .. $num_val[$filter_criterion]) { my $nmatch = 0; # loop through all descriptors in criterion: DESC: foreach my $idesc (0 .. $num_desc[$filter_criterion] - 1) { my $filter_desc = $fid[$filter_criterion]->[$idesc]; # loop through all data in subset: for (my $j = 0; $j < @{$descriptors}; $j++) { if ($descriptors->[$j] == $filter_desc) { (my $val = $data->[$j]) =~ s/^\s*(.*?)\s*$/$1/; if ($val eq $fiv[$filter_criterion]->[$line]->[$idesc]) { $nmatch++; # Matched! if ($nmatch == $num_desc[$filter_criterion]) { # All descriptors for this line in this criterion matched. # Do we need to check more criteria? if ($required[$filter_criterion]) { $num_success_req_criteria++; if ($num_success_req_criteria == $num_required_criteria and ($num_ordinary_criteria == 0 or $num_success_ord_criteria > 0)) { return 0; # Don't filter this observation } } else { $num_success_ord_criteria++; if ($num_success_req_criteria == $num_required_criteria) { return 0; # Don't filter this observation } } } else { next DESC; } } else { # Found the descriptor, but wrong value next LINE; } } } } # End of filter descriptor loop } # End of value line loop } } # End of criteria loop # One required criterion not fulfilled, or if there are no # required criteria: none of the non-required criteria fulfilled # (so the observation should be filtered away) return 1; }