bufr.pm:bufrdump.pl_source [wiki.met.no]

This is an old revision of the document!
#!/usr/bin/perl -w
 
# (C) Copyright 2010, met.no
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
 
# pod included at end of file
 
use strict;
use Getopt::Long;
use Pod::Usage qw(pod2usage);
 
use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables';
my $BUFRDUMP = 'bufrdump'; # You should add path if bufrdump is
                           # installed in a non-standard place
 
# Parse command line options
my %option = ();
GetOptions(
           \%option,
           'help',
           'tablepath=s', # Set BUFR table path
           'filter=s',    # Decode observations meeting criteria in <filter file> only
           'param=s',     # Decode/print specified parameters only
           'csv',         # Use CSV format for printing
           'sort',        # Sort on stationid (wmonr/call_sign/buoy_id/aircraft)
           'sort_on=s',   # Sort on specified parameter
           'station=s',   # Filter on list of stations
           'transform=s', # Do the transformations in <transformation file>
           'lon1=i',
           'lat1=i',
           'lon2=i',
           'lat2=i',
           'obstype=s',   # Force observation type
       ) or pod2usage(-verbose => 0);
 
# User asked for help
pod2usage(-verbose => 1) if $option{help};
 
# Make sure there is at least one input file
pod2usage(-verbose => 0) unless @ARGV;
 
# --csv can only be used together with --param
pod2usage(-verbose => 0) if $option{csv} && !$option{param};
 
# Prevent ECMWF software from printing table info
$ENV{PRINT_TABLE_NAMES} = 'false';
 
# Set BUFR table path environment variable used by bufrdump
if ($option{tablepath}) {
    # Command line option --tablepath overrides all
    $ENV{BUFR_TABLES} = $option{tablepath};
} elsif (!$ENV{BUFR_TABLES}) {
    $ENV{BUFR_TABLES} = DEFAULT_TABLE_PATH;
}
# ECMWF software requires trailing '/' in bufrpath
$ENV{BUFR_TABLES} .= '/' if substr($ENV{BUFR_TABLES},-1) ne '/';
 
my $obstype = $option{obstype} ? "--obstype $option{obstype}" : '';
my $filter = $option{filter} ? "--filter $option{filter}" : '';
my $lon1 = $option{lon1} ? "--lon1 $option{lon1}" : '';
my $lat1 = $option{lat1} ? "--lat1 $option{lat1}" : '';
my $lon2 = $option{lon2} ? "--lon2 $option{lon2}" : '';
my $lat2 = $option{lat2} ? "--lat2 $option{lat2}" : '';
 
# Any filter criteria provided?
my $criteria_ref = read_filter_file($option{filter});
 
# Any specific stations requested?
my ($req_id, $req_stn_ref) = get_requested_stations($option{station});
 
# Any specific parameters specified?
my ($params_ref, $forced_params_ref) = get_params($option{param});
 
my $csv = $option{csv} ? 1 : 0;
# First line in CSV should be the parameters
print join(',', @$params_ref) . "\n" if $csv;
 
# Any transformations of units specified?
my $transform_file = $option{transform} ? $option{transform} : 0;
my $transform_ref = read_transformation_file($transform_file);
 
# Any sorting requested?
my $sort = $option{sort} ? 1 : 0;
my $sort_on = $option{sort_on} ? $option{sort_on} : '';
 
# What kind of sorting is required (if any)?
($sort_on, my $by) = get_sort_method($sort_on, $sort);
 
# Loop for processing of BUFR input files
my %data_of = ();
foreach my $inputfname (@ARGV) {
 
    # Dump the content of the BUFR file using the Fortran program $BUFRDUMP
    my $dump = `$BUFRDUMP $obstype $filter $lon1 $lat1 $lon2 $lat2 $inputfname`;
    die if $?; # Reason for bufrdump failing should have been printed to STDERR
 
    # Then process the output from the dump
    my @lines = split /\n/, $dump;
    # Add an empty line to simplify processing
    push @lines, '';
    my $stnid = '';
 
    # Skip first (blank) line
    shift @lines;
 
    my @lines_to_print;
    my %msg; # Hash with parameter name as key, parameter value as value
 
  LINE:while (defined(my $line = shift @lines)) {
        # Each new message starts with a blank line
        if ($line !~ /^\s*$/) {
            # Skip error messages from libbufr, which should start with space(s)
            next if $line =~ /^\s+/;
            # Build up the message to be (possibly) printed
            my ($param, $value) = ($line =~ /^(.+)=\s*(.*?)\s*$/);
            if ($transform_file && $transform_ref->{$param}) {
                # Replace value with transformed value
                my $transform = $transform_ref->{$param};
                $transform =~ s/\$x/$value/g;
                $value = eval $transform;
                die "Couldn't parse following transform:\n"
                    . $transform . "\nReason:\n$@" if $@;
                $line =~ s/=.*/=$value/;
            }
            $msg{$param} = $value;
            push @lines_to_print, $line;
        }
 
        if ($line =~ /^\s*$/ or @lines == 0) {
            # A full message has been completed. Should it be printed?
            if ($filter && filter_obs(\%msg, $criteria_ref)) {
                # Skip this message
            } elsif ($req_id && filter_station(\%msg, $req_id, $req_stn_ref)) {
                # Skip this station
            } else {
                # Print the message (or if --sort or --sort_on: save the message)
                my $txt = '';
                if ($params_ref) {
                    # Print the params in @$params_ref if exists in
                    # message, in same order as in @$params_ref
                    foreach my $name (@$params_ref) {
                        if (exists $msg{$name}) {
                            $txt .= $csv ? $msg{$name} . ',' : "$name=$msg{$name}\n";
                        } elsif ($forced_params_ref->{$name}) {
                            $txt .= $csv ? '-32767,' : "$name=-32767\n";
                        } elsif ($csv) {
                            $txt .= ',';
                        }
                    }
                } else {
                    foreach my $line2 (@lines_to_print) {
                        $line2 =~ s/=\s+/=/;
                        $txt .= $line2 . "\n";
                    }
                }
                chop $txt if $csv; # removes last ','
                if ($txt) {
                    if ($sort) {
                        # Sort wmonr before call sign before buoy_id before aircraft
                        if ($msg{wmonr}) {
                            $stnid = '00_' . $msg{wmonr};
                        } elsif ($msg{call_sign}) {
                            $stnid = '10_' . $msg{call_sign};
                        } elsif ($msg{buoy_id}) {
                            $stnid = '10_' . $msg{buoy_id};
                        } elsif ($msg{aircraft}) {
                            $stnid = '10_' . $msg{aircraft};
                        } else {
                            # Skip observation if no station identification found
                            next LINE;
                        }
                        if ($sort_on) {
                            my $val = exists $msg{$sort_on} ? $msg{$sort_on} : '';
                            my $key = $stnid . '|' . $val;
                            $data_of{$key} = exists $data_of{$key}
                                ? $data_of{$key} . "$txt \n" : "$txt \n";
                        } else {
                            $data_of{$stnid} = exists $data_of{$stnid}
                                ? $data_of{$stnid} . "$txt \n" : "$txt \n";
                        }
                    } elsif ($sort_on) {
                        my $val = exists $msg{$sort_on} ? $msg{$sort_on} : '';
                        $data_of{$val} = exists $data_of{$val}
                            ? $data_of{$val} . "$txt \n" : "$txt \n";
                    } else {
                        # No sorting. We can print the line immediately
                        print $txt .= "\n" if $txt;
                    }
                }
                $txt = '';
            }               # Finished message
            @lines_to_print = ();
            %msg = ();
        }
    }
}
 
# If sorting requested, we cannot print before now
if ($sort && $sort_on) {
    for (sort $by keys %data_of) {
        print $data_of{$_};
    }
} elsif ($sort) {
    for (sort keys %data_of) {
        print $data_of{$_};
    }
} elsif ($sort_on) {
    # Print observations with missing value for the sort parameter lastly
    my $data_of_missing_value = $data_of{''} || '';
    delete $data_of{''};
    for (sort $by keys %data_of) {
        print $data_of{$_};
    }
    print $data_of_missing_value if $data_of_missing_value;
}
 
 
# Read the parameters into @params, those preceded by an exclamation
# mark also into %forced_params, and return references to these two.
sub get_params {
    my $params = shift;
    return if ! $params;
 
    my @params;
    my %forced_params;
 
    if ($params =~ /,/) {
        # Argument to --params is a comma separated list
        my @p = split /,/, $params;
        foreach my $name (@p) {
            if ($name =~ /^!/) {
                $name = substr $name, 1;
                $forced_params{$name} = 1;
            }
            push @params, $name;
        }
    } else {
        # Argument to --params is a file
        open my $PARAM, '<', $params
            or die "Cannot open file $params: $!";
        while (my $name = <$PARAM>) {
            # Skip blank lines and comment lines
            $name =~ s/^\s+//;
            $name =~ s/\s+$//;
            next if !$name || $name =~ /^#/;
            if ($name =~ /^!/) {
                $name = substr $name, 1;
                $forced_params{$name} = 1;
            }
            push @params, $name;
        }
        close $PARAM or die "Cannot close $params: $!";
    }
    return \@params, \%forced_params;
}
 
 
sub read_transformation_file {
    my $transform_file = shift;
    return if !$transform_file;
 
    open my $TRANSFORM, '<', $transform_file
        or die "Cannot open $transform_file: $!";
 
    # Read in the transformations, skipping blank lines and comment
    # lines
    my %transform_of;
    while (my $line = <$TRANSFORM>) {
        $line =~ s/^\s+//;
        $line =~ s/\s+$//;
         next if !$line || $line =~ /^#/;
        my ($param, $transform) = split /=/, $line, 2;
        die "Invalid transformation in $transform_file:\n$line\n"
            unless $transform;
        $param =~ s/\s+$//;
        $transform =~ s/^\s+//;
        $transform_of{$param} = $transform;
    }
    return \%transform_of;
}
 
sub read_filter_file {
    my $filter_file = shift;
    return if !$filter_file;
 
    my @allowed_operators =
        ('=',
         '<',
         '<=',
         '>',
         '>=',
         '!=',
     );
    my @criteria;
 
    open my $FILTER, '<', $filter_file
        or die "Cannot open $filter_file: $!";
    # Skip the criteria meant for Fortran parsing, i.e. proceed to
    # first line following a blank line
    while (<$FILTER>) {
        last if $_ =~ /^\s*$/;
    }
 
    # Read the filter criteria meant for Perl parsing, skipping blank
    # lines and comment lines
    while (my $line = <$FILTER>) {
        $line =~ s/^\s+//;
        $line =~ s/\s+$//;
        next if !$line || $line =~ /^#/;
        push @criteria, $line;
    }
 
    # Check that the criteria are properly formatted
    foreach my $criterium (@criteria) {
        # Naked parameter is ok
        next if $criterium =~ /^\w+$/;
 
        my $op = (split / +/, $criterium)[1];
        if (!defined($op) or grep(/[+*?\\]/, $op) or !grep(/^$op$/, @allowed_operators) ) {
            print "Error in $filter_file, line $. is badly formatted"
                . " or operator not supported:\n$criterium";
            exit 1;
        }
    }
    return \@criteria;
}
 
# Return true (1) if observation is to be filtered, i.e. does not
# comply with at least one of the <param> or <param> <operator>
# <value> filter criteria in filter file
sub filter_obs {
    my $msg_ref = shift;
    my $criteria_ref = shift;
 
    my @ascii_params = qw(aircraft call_sign icao_id name obstime type);
 
    foreach my $criterium (@$criteria_ref) {
        my ($f_param, $f_operator, $f_value) = split / +/, $criterium, 3;
        return 1 unless exists $msg_ref->{$f_param};
        next if not defined $f_operator; # Naked parameter criterium. Parameter
                                         # present, so criterium fulfilled
        chomp $f_value;
        if ($f_operator eq '=') {
            if (grep {$_ eq $f_param} @ascii_params) {
                $msg_ref->{$f_param} =~ s/\s*$//;
                return 1 unless $msg_ref->{$f_param} eq $f_value;
            } else {
                return 1 unless $msg_ref->{$f_param} == $f_value;
            }
        } elsif ($f_operator eq '<') {
            return 1 unless $msg_ref->{$f_param} < $f_value;
        } elsif ($f_operator eq '<=') {
            return 1 unless $msg_ref->{$f_param} <= $f_value;
        } elsif ($f_operator eq '>') {
            return 1 unless $msg_ref->{$f_param} > $f_value;
        } elsif ($f_operator eq '>=') {
            return 1 unless $msg_ref->{$f_param} >= $f_value;
        } elsif ($f_operator eq '!=') {
            if (grep {$_ eq $f_param} @ascii_params) {
                $msg_ref->{$f_param} =~ s/\s*$//;
                return 1 unless $msg_ref->{$f_param} ne $f_value;
            } else {
                return 1 unless $msg_ref->{$f_param} != $f_value;
            }
        }
    }
 
    # All filter conditions have been fullfilled
    return 0;
}
 
sub get_requested_stations {
    my $req_stations = shift;
    return if !$req_stations;
    die "Station list must start with 'wmonr=', 'call_sign=', "
        . "'buoy_id=' or aircraft='"
        unless $req_stations =~ /^(wmonr=|call_sign=|buoy_id=|aircraft=)/;
 
    my ($id, $rest) = split /=/, $req_stations;
    my @stations = split /,/, $rest;
    if ($id eq 'wmonr') {
        my @req_stn;
        foreach my $station (@stations) {
            # Turn $station into a 5 digit wmonr
            $station =~ s/^0+//;
            $station += 1000 if $station < 1000;
            $station = sprintf("%05d", $station);
            push @req_stn, $station;
        }
        return ($id, \@req_stn);
    } else {
        return ($id, \@stations);
    }
}
 
# Return true (1) if observation does not contain one of the stations
# listed in @$req_stn_ref (of type $req_id), i.e. if this observation
# should be filtered away
sub filter_station {
    my ($msg_ref, $req_id, $req_stn_ref) = @_;
 
    return 1 unless $msg_ref->{$req_id};
    my $stn = $msg_ref->{$req_id};
 
    return !grep { $_ eq $stn } @$req_stn_ref;
}
 
 
# When --sort_on is used, need to supply the sort method. Also returns
# input parameter $sort_on with possible trailing '-' (or '+')
# stripped off
sub get_sort_method {
    my ($sort_on, $sort) = @_;
    return if !$sort_on;
 
    my $ascending_sort = 1;
    # A minus sign appended to the sort parameter means descending sort
    if ($sort_on =~ /-$/) {
        $ascending_sort = 0;
        chop $sort_on;
    }
    # Just in case someone adds a '+' to signify ascending sort
    if ($sort_on =~ /[+]$/) {
        chop $sort_on;
    }
 
    my @ascii_params = qw(aircraft call_sign icao_id obstime name type);
    my $lexical_sort = grep {$_ eq $sort_on} @ascii_params;
 
    my $sort_sub;
    if ($sort) {
        $sort_sub = sub {
            my ($stn_a, $val_a) = split /\|/, $a;
            my ($stn_b, $val_b) = split /\|/, $b;
            # Sort on stationid is always lexical and ascending
            my $cmp = $stn_a cmp $stn_b;
            if ($cmp) {
                return $cmp;
            } else {
                # Stationids are equal. Sort on value of sort_on parameter
                if ($lexical_sort) {
                    if ($ascending_sort) {
                        return $val_a cmp $val_b;
                    } else {
                        return $val_b cmp $val_a;
                    }
                } else {
                    # Numerical sort
                    if ($ascending_sort) {
                        return $val_a <=> $val_b;
                    } else {
                        return $val_b <=> $val_a;
                    }
                }
            }
        }
    } else {
        # Sort restricted to sort_on parameter
        $sort_sub = sub {
            if ($lexical_sort) {
                if ($ascending_sort) {
                    return $a cmp $b;
                } else {
                    return $b cmp $a;
                }
            } else {
                # Numerical sort
                if ($ascending_sort) {
                    return $a <=> $b;
                } else {
                    return $b <=> $a;
                }
            }
        }
    }
 
    return ($sort_on, $sort_sub);
}
 
=pod
 
=head1 SYNOPSIS
 
  bufrdump.pl <bufr file(s)>
      [--filter <filter file>]
      [--param <parameter file | parameter list> [--csv]]
      [--sort]
      [--sort_on <parameter>[-]]
      [--station <station list>]
      [--transform <transformation file>]
      [--lon1 x1]
      [--lat1 y1]
      [--lon2 x2]
      [--lat2 x2]
      [--obstype <amdar|ocea|surface|sounding>]
      [--tablepath <path to BUFR tables>]
      [--help]
 
=head1 DESCRIPTION
 
Extracts BUFR messages from BUFR file(s) and prints section 4 as
"parameter=value" lines. Calls the Fortran program bufrdump
internally, so this program must be installed at the location set in
variable $BUFRDUMP in source code.
 
Execute without arguments for Usage, with option C<--help> for some
additional info. See also L</https://wiki.met.no/bufr.pm/start> for
examples of use.
 
 
=head1 OPTIONS
 
 
  --filter <filter file>
                  Decode observations meeting criteria in <filter file> only
  --param <parameter file | parameter list> [--csv]
                  Print parameters in parameter file or comma separated
                  list (e.g. wmonr,TA) only, in same order as they occur
                  there. If --csv, the parameters vill be printed using
                  the CSV (comma separated values) format
  --sort          Sort the decoded observations on station identification;
                  first stations with wmonr, then stations with call sign,
                  buoy_id or aircraft (others left out)
  --sort_on <parameter>[-] Sort the decoded observations on increasing
                  values of <parameter>, or decreasing values if a '-'
                  follows the parameter name. E.g. --sort_on TA- will
                  sort on decreasing temperatures. Observations not
                  containing the parameter at all will be printed lastly,
                  except when --sort_on is combined with --sort (in which
                  case sorting is done firstly on station identification,
                  secondly on <parameter> with missing values printed first)
  --station <station list>
                  Print observations for stations in station list only,
                  e.g. wmonr=01384,01492
  --transform <transformation file>
                  Do the transformations of parameter values listed in
                  <transformation file>
  --lon1 x1       Decode observations with longitude >= x1 only
  --lat1 y1       Decode observations with latitude >= y1 only
  --lon2 x2       Decode observations with longitude <= x2 only
  --lat2 y2       Decode observations with latitude <= y2 only
                  x1,y1,x2,y2 should be decimal degrees
  --obstype <amdar|ocea|surface|sounding>]
                  Force observation type. If this option is not set,
		  will make an educated guess of observation type based
                  on metadata in section 1 of each BUFR message
  --tablepath <path to BUFR tables>
                  Set path to BUFR tables (overrides ENV{BUFR_TABLES})
  --help          Print this Usage (but you might instead prefer to use
                  perldoc bufrdump.pl)
 
Options may be abbreviated, e.g. --h or -h for --help.
 
To avoid having to use the C<--tablepath> option, you are adviced to
set the invironment variable BUFR_TABLES to the directory where your
BUFR tables are located (unless the default path provided by
bufrdump.pl works for you).
 
The lines in <parameter file>, or the comma separated values in
<parameter list>, should be name of the parameters you want to be
printed. For example, if you want only station identification and
temperature to be printed for a BUFR SYNOP file, either supply
 
  wmonr,call_sign,TA
 
as argument to --params, or supply a <parameter file> which should
look like this:
 
  wmonr
  call_sign
  TA
 
If you want "parameter=value" to be printed also when value is missing
in BUFR message, precede the parameter name with an exclamation mark
(e.g. '!TA'). Missing values will then be displayed as -32767. If the
argument to --param is a parameter list, you must prevent the shell
from attaching special meaning to the exclamation mark by enclosing the
list in single quotes.
 
If the parameter list consists of one parameter only, a comma must be
appended (e.g. 'wmonr,') because bufrdump.pl uses the appearence of
comma to signal that this is not a filename but parameter name(s).
 
If --csv is used in conjunction with --param, all values will be
printed using the CSV format, with first line listing the parameters,
and with missing fields printed as -32767 if the parameter is marked
with '!' in parameter file or list. With the parameter file above, the
listing may for example start like
 
  wmonr,call_sign,TA
  01001,,-1.5
  ,LF5U,9.0
 
Using --filter will decode only those observations that meet at least
one of the BUFR descriptor criteria and all of the parameter criteria
in <filter file>, where the BUFR descriptor criteria should come first
in filter file followed by a blank line, then comes the parameter
criteria which should match <param> or <param> <operator> <value>
where operator is one of =, !=, <, <=, > and >=. An example filter
file is
 
  D: 001001 I2.2
  01
  D: 001001 I2.2 001002 I3.3
  03 895
  06 252
  D: 001011 A9
  LF5U
 
  type = Manned
  NN != 8
  TA >= 5
  TA < 9.5
  RR_24
 
which decodes all observations with block number 01, two other
specific wmo stations and one specific ship, being manned stations and
having cloud cover different from 8 (but NN must be part of the
message) and temperature between 5 and 9.5 degrees Celsius and
containing precipitation for last 24 hours. Comment lines starting
with # will be ignored.
 
Another example: the simple filter file (starting with a blank line!)
 
 
  wmonr
 
will print only those observations containing a wmonr (skipping
ships).
 
To avoid the need of creating a filter file when observations for some
few stations are requested, you can provide the stations in a comma
separated list after option --station. Four examples:
 
  --station wmonr=01001,01152,01492
  --station call_sign=LF5U
  --station buoyid=64607,64609
  --station aircraft=EU3421,JHCWUURA
 
You cannot mix different kinds of stations this way (before '=' you
must choose either wmonr, call_sign, buoy_id or aircraft). Note also
that providing the stations in the BUFR descriptor part (first part)
of the filter file will speed up execution time considerably, compared
to using option --station. It is possible to combine --filter with
--station if done with some care, e.g, specifying WMO block 01 and the
required parameters in filter file, then the requested stations in
station list.
 
The --transform option is provided mainly to be able to use other
units than what is default in bufrdump.pl. The transformation file
should list the transformations wanted, one per line as
 
  <parameter> = <perl expression involving $x>
 
where $x is original value of the parameter.
 
For example, the following transformation file will display wind speed
FF and wind gust FG in knots instead of m/s, rounded to one decimal,
and cloud cover NN in % (instead of the default which is using WMO
code table 2700, roughly counting octas):
 
  FF = sprintf("%.1f", $x*1.9438)
  FG = sprintf("%.1f", $x*1.9438)
  NN = int($x*12.5 + .5)
 
If --transform is combined with --filter, the filter criteria should
refer to the transformed values. E.g. if the above NN transform to %
is to be applied for sky not all covered by clouds, you should use NN
!= 100 instead of NN != 8 in filter file.
 
=head1 AUTHOR
 
Pål Sannes E<lt>pal.sannes@met.noE<gt>
 
=head1 COPYRIGHT
 
Copyright (C) 2010 met.no
 
=cut