This is an old revision of the document!
#!/usr/bin/perl -w # (C) Copyright 2010, met.no # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # pod included at end of file use strict; use Getopt::Long; use Pod::Usage qw(pod2usage); use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables'; my $BUFRDUMP = 'bufrdump'; # You should add path if bufrdump is # installed in a non-standard place # Parse command line options my %option = (); GetOptions( \%option, 'help', 'tablepath=s', # Set BUFR table path 'filter=s', # Decode observations meeting criteria in <filter file> only 'param=s', # Decode/print specified parameters only 'csv', # Use CSV format for printing 'sort', # Sort on stationid (wmonr/call_sign/buoy_id/aircraft) 'sort_on=s', # Sort on specified parameter 'station=s', # Filter on list of stations 'transform=s', # Do the transformations in <transformation file> 'lon1=i', 'lat1=i', 'lon2=i', 'lat2=i', 'obstype=s', # Force observation type ) or pod2usage(-verbose => 0); # User asked for help pod2usage(-verbose => 1) if $option{help}; # Make sure there is at least one input file pod2usage(-verbose => 0) unless @ARGV; # --csv can only be used together with --param pod2usage(-verbose => 0) if $option{csv} && !$option{param}; # Prevent ECMWF software from printing table info $ENV{PRINT_TABLE_NAMES} = 'false'; # Set BUFR table path environment variable used by bufrdump if ($option{tablepath}) { # Command line option --tablepath overrides all $ENV{BUFR_TABLES} = $option{tablepath}; } elsif (!$ENV{BUFR_TABLES}) { $ENV{BUFR_TABLES} = DEFAULT_TABLE_PATH; } # ECMWF software requires trailing '/' in bufrpath $ENV{BUFR_TABLES} .= '/' if substr($ENV{BUFR_TABLES},-1) ne '/'; my $obstype = $option{obstype} ? "--obstype $option{obstype}" : ''; my $filter = $option{filter} ? "--filter $option{filter}" : ''; my $lon1 = $option{lon1} ? "--lon1 $option{lon1}" : ''; my $lat1 = $option{lat1} ? "--lat1 $option{lat1}" : ''; my $lon2 = $option{lon2} ? "--lon2 $option{lon2}" : ''; my $lat2 = $option{lat2} ? "--lat2 $option{lat2}" : ''; # Any filter criteria provided? my $criteria_ref = read_filter_file($option{filter}); # Any specific stations requested? my ($req_id, $req_stn_ref) = get_requested_stations($option{station}); # Any specific parameters specified? my ($params_ref, $forced_params_ref) = get_params($option{param}); my $csv = $option{csv} ? 1 : 0; # First line in CSV should be the parameters print join(',', @$params_ref) . "\n" if $csv; # Any transformations of units specified? my $transform_file = $option{transform} ? $option{transform} : 0; my $transform_ref = read_transformation_file($transform_file); # Any sorting requested? my $sort = $option{sort} ? 1 : 0; my $sort_on = $option{sort_on} ? $option{sort_on} : ''; # What kind of sorting is required (if any)? ($sort_on, my $by) = get_sort_method($sort_on, $sort); # Loop for processing of BUFR input files my %data_of = (); foreach my $inputfname (@ARGV) { # Dump the content of the BUFR file using the Fortran program $BUFRDUMP my $dump = `$BUFRDUMP $obstype $filter $lon1 $lat1 $lon2 $lat2 $inputfname`; die if $?; # Reason for bufrdump failing should have been printed to STDERR # Then process the output from the dump my @lines = split /\n/, $dump; # Add an empty line to simplify processing push @lines, ''; my $stnid = ''; # Skip first (blank) line shift @lines; my @lines_to_print; my %msg; # Hash with parameter name as key, parameter value as value LINE:while (defined(my $line = shift @lines)) { # Each new message starts with a blank line if ($line !~ /^\s*$/) { # Skip error messages from libbufr, which should start with space(s) next if $line =~ /^\s+/; # Build up the message to be (possibly) printed my ($param, $value) = ($line =~ /^(.+)=\s*(.*?)\s*$/); if ($transform_file && $transform_ref->{$param}) { # Replace value with transformed value my $transform = $transform_ref->{$param}; $transform =~ s/\$x/$value/g; $value = eval $transform; die "Couldn't parse following transform:\n" . $transform . "\nReason:\n$@" if $@; $line =~ s/=.*/=$value/; } $msg{$param} = $value; push @lines_to_print, $line; } if ($line =~ /^\s*$/ or @lines == 0) { # A full message has been completed. Should it be printed? if ($filter && filter_obs(\%msg, $criteria_ref)) { # Skip this message } elsif ($req_id && filter_station(\%msg, $req_id, $req_stn_ref)) { # Skip this station } else { # Print the message (or if --sort or --sort_on: save the message) my $txt = ''; if ($params_ref) { # Print the params in @$params_ref if exists in # message, in same order as in @$params_ref foreach my $name (@$params_ref) { if (exists $msg{$name}) { $txt .= $csv ? $msg{$name} . ',' : "$name=$msg{$name}\n"; } elsif ($forced_params_ref->{$name}) { $txt .= $csv ? '-32767,' : "$name=-32767\n"; } elsif ($csv) { $txt .= ','; } } } else { foreach my $line2 (@lines_to_print) { $line2 =~ s/=\s+/=/; $txt .= $line2 . "\n"; } } chop $txt if $csv; # removes last ',' if ($txt) { if ($sort) { # Sort wmonr before call sign before buoy_id before aircraft if ($msg{wmonr}) { $stnid = '00_' . $msg{wmonr}; } elsif ($msg{call_sign}) { $stnid = '10_' . $msg{call_sign}; } elsif ($msg{buoy_id}) { $stnid = '10_' . $msg{buoy_id}; } elsif ($msg{aircraft}) { $stnid = '10_' . $msg{aircraft}; } else { # Skip observation if no station identification found next LINE; } if ($sort_on) { my $val = exists $msg{$sort_on} ? $msg{$sort_on} : ''; my $key = $stnid . '|' . $val; $data_of{$key} = exists $data_of{$key} ? $data_of{$key} . "$txt \n" : "$txt \n"; } else { $data_of{$stnid} = exists $data_of{$stnid} ? $data_of{$stnid} . "$txt \n" : "$txt \n"; } } elsif ($sort_on) { my $val = exists $msg{$sort_on} ? $msg{$sort_on} : ''; $data_of{$val} = exists $data_of{$val} ? $data_of{$val} . "$txt \n" : "$txt \n"; } else { # No sorting. We can print the line immediately print $txt .= "\n" if $txt; } } $txt = ''; } # Finished message @lines_to_print = (); %msg = (); } } } # If sorting requested, we cannot print before now if ($sort && $sort_on) { for (sort $by keys %data_of) { print $data_of{$_}; } } elsif ($sort) { for (sort keys %data_of) { print $data_of{$_}; } } elsif ($sort_on) { # Print observations with missing value for the sort parameter lastly my $data_of_missing_value = $data_of{''} || ''; delete $data_of{''}; for (sort $by keys %data_of) { print $data_of{$_}; } print $data_of_missing_value if $data_of_missing_value; } # Read the parameters into @params, those preceded by an exclamation # mark also into %forced_params, and return references to these two. sub get_params { my $params = shift; return if ! $params; my @params; my %forced_params; if ($params =~ /,/) { # Argument to --params is a comma separated list my @p = split /,/, $params; foreach my $name (@p) { if ($name =~ /^!/) { $name = substr $name, 1; $forced_params{$name} = 1; } push @params, $name; } } else { # Argument to --params is a file open my $PARAM, '<', $params or die "Cannot open file $params: $!"; while (my $name = <$PARAM>) { # Skip blank lines and comment lines $name =~ s/^\s+//; $name =~ s/\s+$//; next if !$name || $name =~ /^#/; if ($name =~ /^!/) { $name = substr $name, 1; $forced_params{$name} = 1; } push @params, $name; } close $PARAM or die "Cannot close $params: $!"; } return \@params, \%forced_params; } sub read_transformation_file { my $transform_file = shift; return if !$transform_file; open my $TRANSFORM, '<', $transform_file or die "Cannot open $transform_file: $!"; # Read in the transformations, skipping blank lines and comment # lines my %transform_of; while (my $line = <$TRANSFORM>) { $line =~ s/^\s+//; $line =~ s/\s+$//; next if !$line || $line =~ /^#/; my ($param, $transform) = split /=/, $line, 2; die "Invalid transformation in $transform_file:\n$line\n" unless $transform; $param =~ s/\s+$//; $transform =~ s/^\s+//; $transform_of{$param} = $transform; } return \%transform_of; } sub read_filter_file { my $filter_file = shift; return if !$filter_file; my @allowed_operators = ('=', '<', '<=', '>', '>=', '!=', ); my @criteria; open my $FILTER, '<', $filter_file or die "Cannot open $filter_file: $!"; # Skip the criteria meant for Fortran parsing, i.e. proceed to # first line following a blank line while (<$FILTER>) { last if $_ =~ /^\s*$/; } # Read the filter criteria meant for Perl parsing, skipping blank # lines and comment lines while (my $line = <$FILTER>) { $line =~ s/^\s+//; $line =~ s/\s+$//; next if !$line || $line =~ /^#/; push @criteria, $line; } # Check that the criteria are properly formatted foreach my $criterium (@criteria) { # Naked parameter is ok next if $criterium =~ /^\w+$/; my $op = (split / +/, $criterium)[1]; if (!defined($op) or grep(/[+*?\\]/, $op) or !grep(/^$op$/, @allowed_operators) ) { print "Error in $filter_file, line $. is badly formatted" . " or operator not supported:\n$criterium"; exit 1; } } return \@criteria; } # Return true (1) if observation is to be filtered, i.e. does not # comply with at least one of the <param> or <param> <operator> # <value> filter criteria in filter file sub filter_obs { my $msg_ref = shift; my $criteria_ref = shift; my @ascii_params = qw(aircraft call_sign icao_id name obstime type); foreach my $criterium (@$criteria_ref) { my ($f_param, $f_operator, $f_value) = split / +/, $criterium, 3; return 1 unless exists $msg_ref->{$f_param}; next if not defined $f_operator; # Naked parameter criterium. Parameter # present, so criterium fulfilled chomp $f_value; if ($f_operator eq '=') { if (grep {$_ eq $f_param} @ascii_params) { $msg_ref->{$f_param} =~ s/\s*$//; return 1 unless $msg_ref->{$f_param} eq $f_value; } else { return 1 unless $msg_ref->{$f_param} == $f_value; } } elsif ($f_operator eq '<') { return 1 unless $msg_ref->{$f_param} < $f_value; } elsif ($f_operator eq '<=') { return 1 unless $msg_ref->{$f_param} <= $f_value; } elsif ($f_operator eq '>') { return 1 unless $msg_ref->{$f_param} > $f_value; } elsif ($f_operator eq '>=') { return 1 unless $msg_ref->{$f_param} >= $f_value; } elsif ($f_operator eq '!=') { if (grep {$_ eq $f_param} @ascii_params) { $msg_ref->{$f_param} =~ s/\s*$//; return 1 unless $msg_ref->{$f_param} ne $f_value; } else { return 1 unless $msg_ref->{$f_param} != $f_value; } } } # All filter conditions have been fullfilled return 0; } sub get_requested_stations { my $req_stations = shift; return if !$req_stations; die "Station list must start with 'wmonr=', 'call_sign=', " . "'buoy_id=' or aircraft='" unless $req_stations =~ /^(wmonr=|call_sign=|buoy_id=|aircraft=)/; my ($id, $rest) = split /=/, $req_stations; my @stations = split /,/, $rest; if ($id eq 'wmonr') { my @req_stn; foreach my $station (@stations) { # Turn $station into a 5 digit wmonr $station =~ s/^0+//; $station += 1000 if $station < 1000; $station = sprintf("%05d", $station); push @req_stn, $station; } return ($id, \@req_stn); } else { return ($id, \@stations); } } # Return true (1) if observation does not contain one of the stations # listed in @$req_stn_ref (of type $req_id), i.e. if this observation # should be filtered away sub filter_station { my ($msg_ref, $req_id, $req_stn_ref) = @_; return 1 unless $msg_ref->{$req_id}; my $stn = $msg_ref->{$req_id}; return !grep { $_ eq $stn } @$req_stn_ref; } # When --sort_on is used, need to supply the sort method. Also returns # input parameter $sort_on with possible trailing '-' (or '+') # stripped off sub get_sort_method { my ($sort_on, $sort) = @_; return if !$sort_on; my $ascending_sort = 1; # A minus sign appended to the sort parameter means descending sort if ($sort_on =~ /-$/) { $ascending_sort = 0; chop $sort_on; } # Just in case someone adds a '+' to signify ascending sort if ($sort_on =~ /[+]$/) { chop $sort_on; } my @ascii_params = qw(aircraft call_sign icao_id obstime name type); my $lexical_sort = grep {$_ eq $sort_on} @ascii_params; my $sort_sub; if ($sort) { $sort_sub = sub { my ($stn_a, $val_a) = split /\|/, $a; my ($stn_b, $val_b) = split /\|/, $b; # Sort on stationid is always lexical and ascending my $cmp = $stn_a cmp $stn_b; if ($cmp) { return $cmp; } else { # Stationids are equal. Sort on value of sort_on parameter if ($lexical_sort) { if ($ascending_sort) { return $val_a cmp $val_b; } else { return $val_b cmp $val_a; } } else { # Numerical sort if ($ascending_sort) { return $val_a <=> $val_b; } else { return $val_b <=> $val_a; } } } } } else { # Sort restricted to sort_on parameter $sort_sub = sub { if ($lexical_sort) { if ($ascending_sort) { return $a cmp $b; } else { return $b cmp $a; } } else { # Numerical sort if ($ascending_sort) { return $a <=> $b; } else { return $b <=> $a; } } } } return ($sort_on, $sort_sub); } =pod =head1 SYNOPSIS bufrdump.pl <bufr file(s)> [--filter <filter file>] [--param <parameter file | parameter list> [--csv]] [--sort] [--sort_on <parameter>[-]] [--station <station list>] [--transform <transformation file>] [--lon1 x1] [--lat1 y1] [--lon2 x2] [--lat2 x2] [--obstype <amdar|ocea|surface|sounding>] [--tablepath <path to BUFR tables>] [--help] =head1 DESCRIPTION Extracts BUFR messages from BUFR file(s) and prints section 4 as "parameter=value" lines. Calls the Fortran program bufrdump internally, so this program must be installed at the location set in variable $BUFRDUMP in source code. Execute without arguments for Usage, with option C<--help> for some additional info. See also L</https://wiki.met.no/bufr.pm/start> for examples of use. =head1 OPTIONS --filter <filter file> Decode observations meeting criteria in <filter file> only --param <parameter file | parameter list> [--csv] Print parameters in parameter file or comma separated list (e.g. wmonr,TA) only, in same order as they occur there. If --csv, the parameters vill be printed using the CSV (comma separated values) format --sort Sort the decoded observations on station identification; first stations with wmonr, then stations with call sign, buoy_id or aircraft (others left out) --sort_on <parameter>[-] Sort the decoded observations on increasing values of <parameter>, or decreasing values if a '-' follows the parameter name. E.g. --sort_on TA- will sort on decreasing temperatures. Observations not containing the parameter at all will be printed lastly, except when --sort_on is combined with --sort (in which case sorting is done firstly on station identification, secondly on <parameter> with missing values printed first) --station <station list> Print observations for stations in station list only, e.g. wmonr=01384,01492 --transform <transformation file> Do the transformations of parameter values listed in <transformation file> --lon1 x1 Decode observations with longitude >= x1 only --lat1 y1 Decode observations with latitude >= y1 only --lon2 x2 Decode observations with longitude <= x2 only --lat2 y2 Decode observations with latitude <= y2 only x1,y1,x2,y2 should be decimal degrees --obstype <amdar|ocea|surface|sounding>] Force observation type. If this option is not set, will make an educated guess of observation type based on metadata in section 1 of each BUFR message --tablepath <path to BUFR tables> Set path to BUFR tables (overrides ENV{BUFR_TABLES}) --help Print this Usage (but you might instead prefer to use perldoc bufrdump.pl) Options may be abbreviated, e.g. --h or -h for --help. To avoid having to use the C<--tablepath> option, you are adviced to set the invironment variable BUFR_TABLES to the directory where your BUFR tables are located (unless the default path provided by bufrdump.pl works for you). The lines in <parameter file>, or the comma separated values in <parameter list>, should be name of the parameters you want to be printed. For example, if you want only station identification and temperature to be printed for a BUFR SYNOP file, either supply wmonr,call_sign,TA as argument to --params, or supply a <parameter file> which should look like this: wmonr call_sign TA If you want "parameter=value" to be printed also when value is missing in BUFR message, precede the parameter name with an exclamation mark (e.g. '!TA'). Missing values will then be displayed as -32767. If the argument to --param is a parameter list, you must prevent the shell from attaching special meaning to the exclamation mark by enclosing the list in single quotes. If the parameter list consists of one parameter only, a comma must be appended (e.g. 'wmonr,') because bufrdump.pl uses the appearence of comma to signal that this is not a filename but parameter name(s). If --csv is used in conjunction with --param, all values will be printed using the CSV format, with first line listing the parameters, and with missing fields printed as -32767 if the parameter is marked with '!' in parameter file or list. With the parameter file above, the listing may for example start like wmonr,call_sign,TA 01001,,-1.5 ,LF5U,9.0 Using --filter will decode only those observations that meet at least one of the BUFR descriptor criteria and all of the parameter criteria in <filter file>, where the BUFR descriptor criteria should come first in filter file followed by a blank line, then comes the parameter criteria which should match <param> or <param> <operator> <value> where operator is one of =, !=, <, <=, > and >=. An example filter file is D: 001001 I2.2 01 D: 001001 I2.2 001002 I3.3 03 895 06 252 D: 001011 A9 LF5U type = Manned NN != 8 TA >= 5 TA < 9.5 RR_24 which decodes all observations with block number 01, two other specific wmo stations and one specific ship, being manned stations and having cloud cover different from 8 (but NN must be part of the message) and temperature between 5 and 9.5 degrees Celsius and containing precipitation for last 24 hours. Comment lines starting with # will be ignored. Another example: the simple filter file (starting with a blank line!) wmonr will print only those observations containing a wmonr (skipping ships). To avoid the need of creating a filter file when observations for some few stations are requested, you can provide the stations in a comma separated list after option --station. Four examples: --station wmonr=01001,01152,01492 --station call_sign=LF5U --station buoyid=64607,64609 --station aircraft=EU3421,JHCWUURA You cannot mix different kinds of stations this way (before '=' you must choose either wmonr, call_sign, buoy_id or aircraft). Note also that providing the stations in the BUFR descriptor part (first part) of the filter file will speed up execution time considerably, compared to using option --station. It is possible to combine --filter with --station if done with some care, e.g, specifying WMO block 01 and the required parameters in filter file, then the requested stations in station list. The --transform option is provided mainly to be able to use other units than what is default in bufrdump.pl. The transformation file should list the transformations wanted, one per line as <parameter> = <perl expression involving $x> where $x is original value of the parameter. For example, the following transformation file will display wind speed FF and wind gust FG in knots instead of m/s, rounded to one decimal, and cloud cover NN in % (instead of the default which is using WMO code table 2700, roughly counting octas): FF = sprintf("%.1f", $x*1.9438) FG = sprintf("%.1f", $x*1.9438) NN = int($x*12.5 + .5) If --transform is combined with --filter, the filter criteria should refer to the transformed values. E.g. if the above NN transform to % is to be applied for sky not all covered by clouds, you should use NN != 100 instead of NN != 8 in filter file. =head1 AUTHOR Pål Sannes E<lt>pal.sannes@met.noE<gt> =head1 COPYRIGHT Copyright (C) 2010 met.no =cut