bufr.pm:bufrdump.pl_source [wiki.met.no]

This is an old revision of the document!
#!/usr/bin/perl -w

# (C) Copyright 2010, met.no
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

# pod included at end of file

use strict;
use Getopt::Long;
use Pod::Usage qw(pod2usage);

use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables';
my $BUFRDUMP = '/metno/local/bin/bufrdump';

# Parse command line options
my %option = ();
GetOptions(
           \%option,
           'help',
           'tablepath=s', # Set BUFR table path
           'filter=s',    # Decode observations meeting criteria in <filter file> only
           'param=s',     # Decode the parameters in <parameter file> only
           'csv',         # Use CSV format for printing
           'sort',        # Sort on stationid (wmonr/call_sign/buoy_id)
           'sort_on=s',   # Sort on specified parameter
           'transform=s', # Do the transformations in <transformation file>
           'lon1=i',
           'lat1=i',
           'lon2=i',
           'lat2=i',
       ) or pod2usage(-verbose => 0);

# User asked for help
pod2usage(-verbose => 1) if $option{help};

# Make sure there is at least one input file
pod2usage(-verbose => 0) unless @ARGV;

# --csv can only be used together with --param
pod2usage(-verbose => 0) if $option{csv} && !$option{param};

# Prevent ECMWF software from printing table info
$ENV{PRINT_TABLE_NAMES} = 'false';

# Set BUFR table path environment variable used by bufrdump
if ($option{tablepath}) {
    # Command line option --tablepath overrides all
    $ENV{BUFR_TABLES} = $option{tablepath};
} elsif (!$ENV{BUFR_TABLES}) {
    $ENV{BUFR_TABLES} = DEFAULT_TABLE_PATH;
}
# ECMWF software requires trailing '/' in bufrpath
$ENV{BUFR_TABLES} .= '/' if substr($ENV{BUFR_TABLES},-1) ne '/';

my $filter = $option{filter} ? "--filter $option{filter}" : '';
my $lon1 = $option{lon1} ? "--lon1 $option{lon1}" : '';
my $lat1 = $option{lat1} ? "--lat1 $option{lat1}" : '';
my $lon2 = $option{lon2} ? "--lon2 $option{lon2}" : '';
my $lat2 = $option{lat2} ? "--lat2 $option{lat2}" : '';

my $criteria_ref;
$criteria_ref = read_filter_file($option{filter}) if $filter;

my $param_file = $option{param} ? $option{param} : '';
my ($forced_params_ref, $params_ref);
($forced_params_ref, $params_ref)
    = read_param_file($param_file) if $param_file;

my $csv = $option{csv} ? 1 : 0;
# First line in CSV should be the parameters
print join(',', @$params_ref) . "\n" if $csv;

my $transform_file = $option{transform} ? $option{transform} : 0;
my $transform_ref;
$transform_ref = read_transformation_file($transform_file) if $transform_file;

my $sort = $option{sort} ? 1 : 0;
my $sort_on = $option{sort_on} ? $option{sort_on} : '';

# What kind of sorting is required (if any)?
($sort_on, my $by) = get_sort_method($sort_on, $sort);

# Loop for processing of BUFR input files
my %data_of = ();
foreach my $inputfname (@ARGV) {

    # Dump the content of the BUFR file using the Fortran program $BUFRDUMP
    my $dump = `$BUFRDUMP $filter $lon1 $lat1 $lon2 $lat2 $inputfname`;
    die if $?; # Reason for bufrdump failing should have been printed to STDERR

    # Then process the output from the dump
    my @lines = split /\n/, $dump;
    # Add an empty line to simplify processing
    push @lines, '';
    my $station = '';

    # Skip first(blank) line
    shift @lines;

    my @lines_to_print;
    my %msg; # Hash with parameter name as key, parameter value as value

  LINE:while (defined(my $line = shift @lines)) {
        # Each new message starts with a blank line
        if ($line !~ /^\s*$/) {
            # Skip error messages from libbufr, which should start with space(s)
            next if $line =~ /^\s+/;
            # Build up the message to be (possibly) printed
            my ($param, $value) = ($line =~ /^(.+)=\s*(.*?)\s*$/);
            if ($transform_file && $transform_ref->{$param}) {
                # Replace value with transformed value
                my $transform = $transform_ref->{$param};
                $transform =~ s/\$x/$value/g;
                $value = eval $transform;
                die "Couldn't parse following transform:\n"
                    . $transform . "\nReason:\n$@" if $@;
                $line =~ s/=.*/=$value/;
            }
            $msg{$param} = $value;
            push @lines_to_print, $line;
        }

        if ($line =~ /^\s*$/ or @lines == 0) {
            # A full message has been completed. Should it be printed?
            if ($filter && filter_obs(\%msg, $criteria_ref)) {
                # Skip this message
            } else {
                # Print the message (or if --sort or --sort_on: save the message)
                my $txt = '';
                if ($param_file) {
                    # Print the params in @$params_ref if exists in
                    # message, in same order as in @$params_ref
                    foreach my $name (@$params_ref) {
                        if (exists $msg{$name}) {
                            $txt .= $csv ? $msg{$name} . ',' : "$name=$msg{$name}\n";
                        } elsif ($forced_params_ref->{$name}) {
                            $txt .= $csv ? '-32767,' : "$name=-32767\n";
                        } elsif ($csv) {
                            $txt .= ',';
                        }
                    }
                } else {
                    foreach my $line2 (@lines_to_print) {
                        $line2 =~ s/=\s+/=/;
                        $txt .= $line2 . "\n";
                    }
                }
                chop $txt if $csv; # removes last ','
                if ($txt) {
                    if ($sort) {
                        # Sort wmonr before call signs before buoy_id
                        if ($msg{wmonr}) {
                            $station = '00_' . $msg{wmonr};
                        } elsif ($msg{call_sign}) {
                            $station = '10_' . $msg{call_sign};
                        } elsif ($msg{buoy_id}) {
                            $station = '10_' . $msg{buoy_id};
                        } else {
                            # Skip observation if no station identification found
                            next LINE;
                        }
                        if ($sort_on) {
                            my $val = exists $msg{$sort_on} ? $msg{$sort_on} : '';
                            my $key = $station . '|' . $val;
                            $data_of{$key} = exists $data_of{$key}
                                ? $data_of{$key} . "$txt \n" : "$txt \n";
                        } else {
                            $data_of{$station} = exists $data_of{$station}
                                ? $data_of{$station} . "$txt \n" : "$txt \n";
                        }
                    } elsif ($sort_on) {
                        my $val = exists $msg{$sort_on} ? $msg{$sort_on} : '';
                        $data_of{$val} = exists $data_of{$val}
                            ? $data_of{$val} . "$txt \n" : "$txt \n";
                    } else {
                        # No sorting. We can print the line immediately
                        print $txt .= "\n" if $txt;
                    }
                }
                $txt = '';
            }               # Finished message
            @lines_to_print = ();
            %msg = ();
        }
    }
}

# If sorting requested, we cannot print before now
if ($sort && $sort_on) {
    for (sort $by keys %data_of) {
        print $data_of{$_};
    }
} elsif ($sort) {
    for (sort keys %data_of) {
        print $data_of{$_};
    }
} elsif ($sort_on) {
    # Print observations with missing value for the sort parameter lastly
    my $data_of_missing_value = $data_of{''} || '';
    delete $data_of{''};
    for (sort $by keys %data_of) {
        print $data_of{$_};
    }
    print $data_of_missing_value if $data_of_missing_value;
}


sub read_param_file {
    my $parameter_file = shift;

    open my $PARAM, '<', $parameter_file
        or die "Cannot open $parameter_file: $!";

    my %forced_params;
    my @params;
    # Read the parameters into @params, those preceded by an
    # exclamation mark also into %forced_params, skipping blank lines
    # and comment lines
    while (my $name = <$PARAM>) {
        $name =~ s/^\s+//;
        $name =~ s/\s+$//;
        next if !$name || $name =~ /^#/;
        if ($name =~ /^!/) {
            $name = substr $name, 1;
            $forced_params{$name} = 1;
        }
        push @params, $name;
    }
    close $PARAM or die "Cannot close $parameter_file: $!";

    return \%forced_params, \@params;
}

sub read_transformation_file {
    my $transform_file = shift;

    open my $TRANSFORM, '<', $transform_file
        or die "Cannot open $transform_file: $!";

    # Read in the transformations, skipping blank lines and comment
    # lines
    my %transform_of;
    while (my $line = <$TRANSFORM>) {
        $line =~ s/^\s+//;
        $line =~ s/\s+$//;
         next if !$line || $line =~ /^#/;
        my ($param, $transform) = split /=/, $line, 2;
        die "Invalid transformation in $transform_file:\n$line\n"
            unless $transform;
        $param =~ s/\s+$//;
        $transform =~ s/^\s+//;
        $transform_of{$param} = $transform;
    }
    return \%transform_of;
}

sub read_filter_file {
    my $filter_file = shift;
    my @allowed_operators =
        ('=',
         '<',
         '<=',
         '>',
         '>=',
         '!=',
     );
    my @criteria;

    open my $FILTER, '<', $filter_file
        or die "Cannot open $filter_file: $!";
    # Skip the criteria meant for Fortran parsing, i.e. proceed to
    # first line following a blank line
    while (<$FILTER>) {
        last if $_ =~ /^\s*$/;
    }

    # Read the filter criteria meant for Perl parsing, skipping blank
    # lines and comment lines
    while (my $line = <$FILTER>) {
        $line =~ s/^\s+//;
        $line =~ s/\s+$//;
        next if !$line || $line =~ /^#/;
        push @criteria, $line;
    }

    # Check that the criteria are properly formatted
    foreach my $criterium (@criteria) {
        # Naked parameter is ok
        next if $criterium =~ /^\w+$/;

        my $op = (split / +/, $criterium)[1];
        if (!defined($op) or grep(/[+*?\\]/, $op) or !grep(/^$op$/, @allowed_operators) ) {
            print "Error in $filter_file, line $. is badly formatted"
                . " or operator not supported:\n$criterium";
            exit 1;
        }
    }
    return \@criteria;
}

# Return true (1) if observation is to be filtered, i.e. does not
# comply with at least one of the <param> or <param> <operator>
# <value> filter criteria in filter file
sub filter_obs {
    my $msg_ref = shift;
    my $criteria_ref = shift;

    my @ascii_params = qw(call_sign icao_id name obstime type);

    foreach my $criterium (@$criteria_ref) {
        my ($f_param, $f_operator, $f_value) = split / +/, $criterium, 3;
        return 1 unless exists $msg_ref->{$f_param};
        next if not defined $f_operator; # Naked parameter criterium. Parameter
                                         # present, so criterium fulfilled
        chomp $f_value;
        if ($f_operator eq '=') {
            if (grep {$_ eq $f_param} @ascii_params) {
                $msg_ref->{$f_param} =~ s/\s*$//;
                return 1 unless $msg_ref->{$f_param} eq $f_value;
            } else {
                return 1 unless $msg_ref->{$f_param} == $f_value;
            }
        } elsif ($f_operator eq '<') {
            return 1 unless $msg_ref->{$f_param} < $f_value;
        } elsif ($f_operator eq '<=') {
            return 1 unless $msg_ref->{$f_param} <= $f_value;
        } elsif ($f_operator eq '>') {
            return 1 unless $msg_ref->{$f_param} > $f_value;
        } elsif ($f_operator eq '>=') {
            return 1 unless $msg_ref->{$f_param} >= $f_value;
        } elsif ($f_operator eq '!=') {
            if (grep {$_ eq $f_param} @ascii_params) {
                $msg_ref->{$f_param} =~ s/\s*$//;
                return 1 unless $msg_ref->{$f_param} ne $f_value;
            } else {
                return 1 unless $msg_ref->{$f_param} != $f_value;
            }
        }
    }

    # All filter conditions have been fullfilled
    return 0;
}

# When --sort_on is used, need to supply the sort method. Also returns
# input parameter $sort_on with possible trailing '-' (or '+')
# stripped off
sub get_sort_method {
    my ($sort_on, $sort) = @_;
    return if !$sort_on;

    my $ascending_sort = 1;
    # A minus sign appended to the sort parameter means descending sort
    if ($sort_on =~ /-$/) {
        $ascending_sort = 0;
        chop $sort_on;
    }
    # Just in case someone adds a '+' to signify ascending sort
    if ($sort_on =~ /[+]$/) {
        chop $sort_on;
    }

    my @ascii_params = qw(call_sign icao_id obstime name type);
    my $lexical_sort = grep {$_ eq $sort_on} @ascii_params;

    my $sort_sub;
    if ($sort) {
        $sort_sub = sub {
            my ($stn_a, $val_a) = split /\|/, $a;
            my ($stn_b, $val_b) = split /\|/, $b;
            # Sort on stationid is always lexical and ascending
            my $cmp = $stn_a cmp $stn_b;
            if ($cmp) {
                return $cmp;
            } else {
                # Stationids are equal. Sort on value of sort_on parameter
                if ($lexical_sort) {
                    if ($ascending_sort) {
                        return $val_a cmp $val_b;
                    } else {
                        return $val_b cmp $val_a;
                    }
                } else {
                    # Numerical sort
                    if ($ascending_sort) {
                        return $val_a <=> $val_b;
                    } else {
                        return $val_b <=> $val_a;
                    }
                }
            }
        }
    } else {
        # Sort restricted to sort_on parameter
        $sort_sub = sub {
            if ($lexical_sort) {
                if ($ascending_sort) {
                    return $a cmp $b;
                } else {
                    return $b cmp $a;
                }
            } else {
                # Numerical sort
                if ($ascending_sort) {
                    return $a <=> $b;
                } else {
                    return $b <=> $a;
                }
            }
        }
    }

    return ($sort_on, $sort_sub);
}

=pod

=head1 SYNOPSIS

  bufrdump.pl <bufr file(s)>
      [--filter <filter file>]
      [--param <parameter file> [--csv]]
      [--sort]
      [--sort_on <parameter>[-]]
      [--transform <transformation file>]
      [--lon1 x1]
      [--lat1 y1]
      [--lon2 x2]
      [--lat2 x2]
      [--tablepath <path to BUFR tables>]
      [--help]

=head1 DESCRIPTION

Extracts BUFR messages from BUFR file(s) and prints section 4 as
"parameter=value" lines. Calls the Fortran program bufrdump
internally, so this program must be installed at the location set in
variable $BUFRDUMP in source code.

Execute without arguments for Usage, with option C<--help> for some
additional info. See also L</https://wiki.met.no/bufr.pm/start> for
examples of use.


=head1 OPTIONS


  --filter <filter file>
                  Decode observations meeting criteria in <filter file> only
  --param <parameter file> [--csv]
                  Print parameters in <parameter file> only, in same order
                  as they occur in <parameter file>. If --csv, the parameters
                  vill be printed using the CSV (comma separated values) format
  --sort          Sort the decoded observations on station identification;
                  first stations with wmonr, then stations with call sign,
                  then stations with buoy_id (others left out)
  --sort_on <parameter>[-] Sort the decoded observations on increasing
                  values of <parameter>, or decreasing values if a '-'
                  follows the parameter name. E.g. --sort_on TA- will
                  sort on decreasing temperatures. Observations not
                  containing the parameter at all will be printed
                  lastly, except when --sort_on is combined with
                  --sort (in which case sorting is done first on
                  station identification, secondly on <parameter>
                  with missing values printed firstly)
  --transform <transformation file>
                  Do the transformations of parameter values listed in
                  <transformation file>
  --lon1 x1       Decode observations with longitude >= x1 only
  --lat1 y1       Decode observations with latitude >= y1 only
  --lon2 x2       Decode observations with longitude <= x2 only
  --lat2 y2       Decode observations with latitude <= y2 only
                  x1,y1,x2,y2 should be decimal degrees
  --tablepath <path to BUFR tables>
                  Set path to BUFR tables (overrides ENV{BUFR_TABLES})
  --help          Print this Usage

Options may be abbreviated, e.g. --h or -h for --help.

To avoid having to use the C<--tablepath> option, you are adviced to
set the invironment variable BUFR_TABLES to the directory where your
BUFR tables are located (unless the default path provided by
bufrdump.pl works for you).

The lines in <parameter file> should be name of the parameters you
want to be printed. For example, if you want only station
identification and temperature to be printed for a BUFR SYNOP file,
the <parameter file> should look like this:

  wmonr
  call_sign
  TA

If you want "parameter=value" to be printed also when value is missing
in BUFR message, precede the parameter name with an exclamation mark
(e.g. '!TA').  Missing values will then be displayed as -32767.

If --csv is used in conjunction with --param, all values will be
printed using the CSV format, with first line listing the parameters,
and with missing fields printed as -32767 if the parameter is marked
with '!' in parameter file. With the parameter file above, the listing
may for example start like

  wmonr,call_sign,TA
  01001,,-1.5
  ,LF5U,9.0

Using --filter will decode only those observations that meet at least
one of the BUFR descriptor criteria and all of the parameter criteria
in <filter file>, where the BUFR descriptor criteria should come first
in filter file followed by a blank line, then comes the parameter
criteria which should match <param> or <param> <operator> <value>
where operator is one of =, !=, <, <=, > and >=. An example filter
file is

  D: 001001 I2.2
  01
  D: 001001 I2.2 001002 I3.3
  03 895
  06 252
  D: 001011 A9
  LF5U

  type = Manned
  NN != 8
  TA >= 5
  TA < 9.5
  RR_24

which decodes all observations with block number 01, two other
specific wmo stations and one specific ship, being manned stations and
having cloud cover different from 8 (but NN must be part of the
message) and temperature between 5 and 9.5 degrees Celsius and
containing precipitation for last 24 hours. Comment lines starting
with # will be ignored.

Another example: the simple filter file (starting with a blank line!)


  wmonr

will print only those observations containing a wmonr (skipping
ships).

The --transform option is provided mainly to be able to use other
units than what is default in bufrdump.pl. The transformation file
should list the transformations wanted, one per line as

  <parameter> = <perl expression involving $x>

where $x is original value of the parameter.

For example, the following transformation file will display wind speed
FF and wind gust FG in knots instead of m/s, rounded to one decimal,
and cloud cover NN in % (instead of the default which is using WMO
code table 2700, roughly counting octas):

  FF = sprintf("%.1f", $x*1.9438)
  FG = sprintf("%.1f", $x*1.9438)
  NN = int($x*12.5 + .5)

If --transform is combined with --filter, the filter criteria should
refer to the transformed values. E.g. if the above NN transform to %
is to be applied for sky not all covered by clouds, you should use NN
!= 100 instead of NN != 8 in filter file.

=head1 AUTHOR

Pål Sannes E<lt>pal.sannes@met.noE<gt>

=head1 COPYRIGHT

Copyright (C) 2010 met.no

=cut