Differences

This shows you the differences between two versions of the page.

--- bufr.pm:bufrdump.pl_source [2010-03-10 09:26:46]
pals
+++ bufr.pm:bufrdump.pl_source [2022-05-31 09:29:31] (current)
@@ Line 1: / Line 1: @@
-<code>
+<code perl bufrdump.pl>
 #!/usr/bin/perl -w
@@ Line 19: / Line 19: @@
 # 02110-1301, USA.
-# Extract BUFR messages from bufr file(s) using the Fortran program
+# pod included at end of file
-# bufrdump and print the data as 'parameter=value' lines. See
-# usage_verbose for explanation of the options allowed.
-# Author: P. Sannes met.no 2010
 use strict;
 use Getopt::Long;
+use Pod::Usage qw(pod2usage);
-my $DEFAULT_TABLE_PATH = '/usr/local/lib/bufrtables';
+use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables';
-my $BUFRDUMP = '/metno/local/bin/bufrdump';
+my $BUFRDUMP = 'bufrdump'; # You should add path if bufrdump is
+                           # installed in a non-standard place
 # Parse command line options
 my %option = ();
 GetOptions(
            \%option,
            'help',
            'tablepath=s', # Set BUFR table path
-           'filter=s',    # Decode observations meeting criteria in <filter file> only
+           'filter=s',    # Decode observations meeting the filter criteria specified only
-           'param=s',     # Decode the parameters in <parameter file> only
+           'param=s',     # Decode/print specified parameters only
+           'csv',         # Use CSV format for printing
+           'delimiter=s', # Choose the delimiter for the CSV format
+           'sort',        # Sort on stationid (wmonr/nationalnr/call_sign/buoy_id/aircraft/icao_id/wigosid)
+           'sort_on=s',   # Sort on specified parameter
+           'station=s',   # Filter on list of stations
+           'transform=s', # Do the transformations in <transformation file>
            'lon1=i',
            'lat1=i',
            'lon2=i',
            'lat2=i',
-       ) or die "Wrong option(s), execute $0 without arguments for Usage\n";
+           'obstype=s',   # Force observation type
+       ) or pod2usage(-verbose => 0);
 # User asked for help
-usage_verbose() if $option{help};
+pod2usage(-verbose => 1) if $option{help};
 # Make sure there is at least one input file
-usage() unless @ARGV;
+pod2usage(-verbose => 0) unless @ARGV;
+# --csv can only be used together with --param
+pod2usage(-verbose => 0) if $option{csv} && !$option{param};
 # Prevent ECMWF software from printing table info
@@ Line 60: / Line 67: @@
     $ENV{BUFR_TABLES} = $option{tablepath};
 } elsif (!$ENV{BUFR_TABLES}) {
-    # Use the libemos bufrtables
+    $ENV{BUFR_TABLES} = DEFAULT_TABLE_PATH;
-    $ENV{BUFR_TABLES} = $DEFAULT_TABLE_PATH;
 }
+# ECMWF software requires trailing '/' in bufrpath
+$ENV{BUFR_TABLES} .= '/' if substr($ENV{BUFR_TABLES},-1) ne '/';
-my $filter = $option{filter} ? "--filter $option{filter}" : '';
+die "Directory for BUFR tables: $ENV{BUFR_TABLES} does not exist"
+    if ! -d $ENV{BUFR_TABLES};
+my $obstype = $option{obstype} ? "--obstype $option{obstype}" : '';
+my $filt = $option{filter} ? "$option{filter}" : '';
 my $lon1 = $option{lon1} ? "--lon1 $option{lon1}" : '';
 my $lat1 = $option{lat1} ? "--lat1 $option{lat1}" : '';
 my $lon2 = $option{lon2} ? "--lon2 $option{lon2}" : '';
 my $lat2 = $option{lat2} ? "--lat2 $option{lat2}" : '';
+my $del = $option{delimiter} ? "$option{delimiter}" : ';';
+my $delimiter = $option{delimiter} ? "--delimiter $del" : '';
-my $criteria_ref = [];
+# Any filter criteria provided?
-if ($filter) {
+my ($filter, $criteria_ref, $num_alt_ref) = get_filter_conditions($filt);
-    $criteria_ref = read_filter_file($option{filter})
-}
-my $forced_params_ref;
+# Any specific stations requested?
-my $params_ref;
+my ($req_id, $req_stn_ref) = get_requested_stations($option{station});
-if ($option{param}) {
-    ($forced_params_ref, $params_ref)
+# Any specific parameters specified?
-        = read_param_file($option{param});
+my ($params_ref, $forced_params_ref) = get_params($option{param});
-}
+my $csv = $option{csv} ? 1 : 0;
+# First line in CSV should be the parameters
+print join($del, @$params_ref) . "\n" if $csv;
+# Any transformations of units specified?
+my $transform_file = $option{transform} ? $option{transform} : 0;
+my $transform_ref = read_transformation_file($transform_file);
+# Any sorting requested?
+my $sort = $option{sort} ? 1 : 0;
+my $sort_on = $option{sort_on} ? $option{sort_on} : '';
+# What kind of sorting is required (if any)?
+($sort_on, my $by) = get_sort_method($sort_on, $sort);
 # Loop for processing of BUFR input files
+my %data_of = ();
 foreach my $inputfname (@ARGV) {
     # Dump the content of the BUFR file using the Fortran program $BUFRDUMP
-    my $dump = `$BUFRDUMP $filter $lon1 $lat1 $lon2 $lat2 $inputfname`;
+    my $fortran_options = "$obstype $filter $lon1 $lat1 $lon2 $lat2";
+    my $dump = `$BUFRDUMP $fortran_options $inputfname`;
+    die if $?; # Reason for bufrdump failing should have been printed to STDERR
     # Then process the output from the dump
     my @lines = split /\n/, $dump;
+    # Add an empty line to simplify processing
+    push @lines, '';
+    my $stnid = '';
-    if (!$option{param} && !@$criteria_ref) {
+    # Skip first (blank) line
-        # Same output as from bufrdump, except that spaces after '=' are removed
+    shift @lines;
-        foreach my $line (@lines) {
-            $line =~ s/=\s+/=/;
-            print $line, "\n";
-        }
-    } else { # Options has been used which the Fortran program doesn't
-             # handle, so special massaging is necessary
-        # Skip first(blank) line
+    my @lines_to_print;
-        shift @lines;
+    my %msg; # Hash with parameter name as key, parameter value as value
-        my @lines_to_print;
+  LINE:
-        my %message; # Hash with parameter name as key, parameter value as value
+    while (defined(my $line = shift @lines)) {
+        # Each new message starts with a blank line
-      LINE:while (defined(my $line = shift @lines)) {
+        if ($line !~ /^\s*$/) {
-            # Each new message starts with a blank line
+            # Skip error messages from libbufr, which should start with space(s)
-            if ($line !~ /^\s*$/) {
+            next LINE if $line =~ /^\s+/;
-                # Build up the message to be (possibly) printed
+            # Build up the message to be (possibly) printed
-                push @lines_to_print, $line;
+            my ($param, $value) = ($line =~ /^(.+)=\s*(.*?)\s*$/);
-                my ($name, $value) = ($line =~ /^(.+)=\s*(.*)$/);
+            # Know only of one case where next check is necessary: if
-                $message{$name} = $value;
+            # a CCITT IA5 value contains new line (\n)
+            next LINE if !defined $value;
+            if ($transform_file && $transform_ref->{$param}) {
+                # Replace value with transformed value
+                my $transform = $transform_ref->{$param};
+                $transform =~ s/\$x/$value/g;
+                $value = eval $transform;
+                die "Couldn't parse following transform:\n"
+                    . $transform . "\nReason:\n$@" if $@;
+                $line =~ s/=.*/=$value/;
             }
+            $msg{$param} = $value;
+            push @lines_to_print, $line;
+        }
-            if ($line =~ /^\s*$/ or @lines == 0) {
+        if ($line =~ /^\s*$/ or @lines == 0) {
-                # A full message has been completed. Should it be printed?
+            # A full message has been completed. Should it be printed?
-                if ($filter && filter_obs(\%message, $criteria_ref)) {
+            if ($filt && filter_obs(\%msg, $criteria_ref, $num_alt_ref)) {
-                    # Skip this message
+                # Skip this message
-                    @lines_to_print = ();
+            } elsif ($req_id && filter_station(\%msg, $req_id, $req_stn_ref)) {
-                    %message = ();
+                # Skip this station
-                    next LINE;
+            } else {
-                }
+                # Print the message (or if --sort or --sort_on: save the message)
-                # Print the message
+                my $txt = '';
-                print "\n";
+                if ($params_ref) {
-                if ($option{param}) {
                     # Print the params in @$params_ref if exists in
                     # message, in same order as in @$params_ref
                     foreach my $name (@$params_ref) {
-                        if (exists $message{$name}) {
+                        if (exists $msg{$name}) {
-                            print "$name=$message{$name}\n";
+                            $txt .= $csv ? $msg{$name} . $del : "$name=$msg{$name}\n";
                         } elsif ($forced_params_ref->{$name}) {
-                            print "$name=-32767\n";
+                            $txt .= $csv ? '-32767' . $del : "$name=-32767\n";
+                        } elsif ($csv) {
+                            $txt .= $del;
                         }
                     }
@@ Line 138: / Line 177: @@
                     foreach my $line2 (@lines_to_print) {
                         $line2 =~ s/=\s+/=/;
-                        print $line2, "\n";
+                        $txt .= $line2 . "\n";
                     }
                 }
-                @lines_to_print = ();
+                if ($csv) {
-                %message = ();
+                    # Remove last $del
-            }
+                    for (1 .. length($del)) {
+                        chop $txt;
+                    }
+                }
+                if ($txt) {
+                    if ($sort) {
+                        # Sort wmonr before nationalnr before call sign before
+                        # buoy_id before aircraft before icaoid before wigosid
+                        if ($msg{wmonr}) {
+                            $stnid = '00_' . $msg{wmonr};
+                        } elsif ($msg{nationalnr}) {
+                            $stnid = '10_' . $msg{nationalnr};
+                        } elsif ($msg{call_sign}) {
+                            $stnid = '20_' . $msg{call_sign};
+                        } elsif ($msg{buoy_id}) {
+                            $stnid = '30_' . $msg{buoy_id};
+                        } elsif ($msg{aircraft}) {
+                            $stnid = '40_' . $msg{aircraft};
+                        } elsif ($msg{icao_id}) {
+                            $stnid = '50_' . $msg{icao_id};
+                        } elsif ($msg{wigosid}) {
+                            $stnid = '60_' . $msg{wigosid};
+                        } else {
+                            # Skip observation if no station identification found
+                            next LINE;
+                        }
+                        if ($sort_on) {
+                            my $val = exists $msg{$sort_on} ? $msg{$sort_on} : '';
+                            my $key = $stnid . '|' . $val;
+                            $data_of{$key} = exists $data_of{$key}
+                            ? $data_of{$key} . "$txt\n" : "$txt\n";
+                        } else {
+                            $data_of{$stnid} = exists $data_of{$stnid}
+                            ? $data_of{$stnid} . "$txt\n" : "$txt\n";
+                        }
+                    } elsif ($sort_on) {
+                        my $val = exists $msg{$sort_on} ? $msg{$sort_on} : '';
+                        $data_of{$val} = exists $data_of{$val}
+                        ? $data_of{$val} . "$txt\n" : "$txt\n";
+                    } else {
+                        # No sorting. We can print the line immediately
+                        print $txt .= "\n" if $txt;
+                    }
+                }
+                $txt = '';
+            }               # Finished message
+            @lines_to_print = ();
+            %msg = ();
         }
     }
 }
+# If sorting requested, we cannot print before now
+if ($sort && $sort_on) {
+    for (sort $by keys %data_of) {
+        print $data_of{$_};
+    }
+} elsif ($sort) {
+    for (sort keys %data_of) {
+        print $data_of{$_};
+    }
+} elsif ($sort_on) {
+    # Print observations with missing value for the sort parameter lastly
+    my $data_of_missing_value = $data_of{''} || '';
+    delete $data_of{''};
+    for (sort $by keys %data_of) {
+        print $data_of{$_};
+    }
+    print $data_of_missing_value if $data_of_missing_value;
+}
-sub read_param_file {
-    my $parameter_file = shift;
-    open my $PARAM, '<', $parameter_file
+# Read the filter conditions (if any). Return the filter option to be
-        or die "Cannot open $parameter_file: $!";
+# used by bufrdump, the found criteria (if any) as well as the number
+# of succeeding alternatives for each criterium
+sub get_filter_conditions {
+    my $filt = shift;
+    return ('') if ! $filt;
-    my %forced_params;
+    my $fortran_filter = '';
-    my @params;
+    my @f;
-    while (my $name = <$PARAM>) {
+    if ($filt =~ /,/) {
-        chomp $name;
+        # Argument to --filter is a comma separated list
-        if ($name =~ /^!/) {
+        @f = split /,/, $filt;
-            $name = substr $name, 1;
+    } else {
-            $forced_params{$name} = 1;
+        # Argument to --filter is a file
+        $fortran_filter = "--filter $filt";
+        open my $FILTER, '<', $filt
+            or die "Cannot open $filt: $!";
+        # Skip the criteria meant for Fortran parsing, i.e. proceed to
+        # first line following a blank line
+        while (<$FILTER>) {
+            last if $_ =~ /^\s*$/;
         }
-        push @params, $name;
+        @f = <$FILTER>;
+        close $FILTER or die "Cannot close $filt: $!";;
     }
-    close $PARAM or die "Cannot close $parameter_file: $!";
+    return ($fortran_filter) if !@f; # BUFR descriptor criteria only
-    return \%forced_params, \@params;
-}
-sub read_filter_file {
-    my $filter_file = shift;
     my @allowed_operators =
         ('=',
@@ Line 179: / Line 288: @@
          '>=',
          '!=',
-     );
+         '=~',
+         '!~',
+        );
     my @criteria;
+    my @num_alt; # Number of alternative criteria following this,
-    open my $FILTER, '<', $filter_file
+                 # i.e. if line is '<cr1> | <cr2> | <cr3>' then
-        or die "Cannot open $filter_file: $!";
+                 # corresponding values in @num_alt will be 2,1,0
-    # Skip the criteria meant for Fortran parsing, i.e. proceed to
-    # first line following a blank line
-    while (<$FILTER>) {
-        last if $_ =~ /^\s*$/;
-    }
     # Read the filter criteria meant for Perl parsing, skipping blank
     # lines and comment lines
-    if (not eof) {
+  FILTERLINE:
-        while (my $line = <$FILTER>) {
+    foreach my $line (@f) {
-            push @criteria, $line
+        $line =~ s/^\s+//;
-                if $line !~ /^\s*$/ && $line !~ /^\s*#/;
+        $line =~ s/\s+$//;
+        next FILTERLINE if !$line || $line =~ /^#/;
+        my @crit = split /\|/, $line;
+        my $num = scalar @crit;
+        foreach my $criterium (@crit) {
+            $criterium =~ s/^\s+//;
+            $criterium =~ s/\s+$//;
+            if ($criterium ne '') {
+                push @criteria, $criterium;
+                push @num_alt, --$num;
+            }
         }
     }
+    return ($fortran_filter) if !@criteria;
     # Check that the criteria are properly formatted
     foreach my $criterium (@criteria) {
+        # Naked parameter possibly preceded by '!' is ok
+        next if $criterium =~ /^!?\w+$/;
         my $op = (split / +/, $criterium)[1];
-        if (!defined($op) or not grep(/^$op$/, @allowed_operators) ) {
+        if (!defined($op) or grep(/[+*?\\]/, $op)
-            print "Error in $filter_file, line $. is badly formatted:\n$criterium";
+            or !grep(/^$op$/, @allowed_operators) ) {
+            print "Error in $filt:\ncriterium is badly formatted"
+                . " or operator not supported:\n$criterium";
             exit 1;
         }
     }
-    return \@criteria;
+    return ($fortran_filter, \@criteria, \@num_alt);
+}
+# Read the parameters into @params, those preceded by an exclamation
+# mark also into %forced_params, and return references to these two.
+sub get_params {
+    my $params = shift;
+    return if ! $params;
+    my @params;
+    my %forced_params;
+    if ($params =~ /,/) {
+        # Argument to --params is a comma separated list
+        my @p = split /,/, $params;
+        foreach my $name (@p) {
+            $name =~ s/^\s+//;
+            $name =~ s/\s+$//;
+            if ($name =~ /^!/) {
+                $name = substr $name, 1;
+                $forced_params{$name} = 1;
+            }
+            push @params, $name;
+        }
+    } else {
+        # Argument to --params is a file
+        open my $PARAM, '<', $params
+            or die "Cannot open file $params: $!";
+        while (my $name = <$PARAM>) {
+            # Skip blank lines and comment lines
+            $name =~ s/^\s+//;
+            $name =~ s/\s+$//;
+            next if !$name || $name =~ /^#/;
+            if ($name =~ /^!/) {
+                $name = substr $name, 1;
+                $forced_params{$name} = 1;
+            }
+            push @params, $name;
+        }
+        close $PARAM or die "Cannot close $params: $!";
+    }
+    return \@params, \%forced_params;
+}
+sub read_transformation_file {
+    my $transform_file = shift;
+    return if !$transform_file;
+    open my $TRANSFORM, '<', $transform_file
+        or die "Cannot open $transform_file: $!";
+    # Read in the transformations, skipping blank lines and comment
+    # lines
+    my %transform_of;
+    while (my $line = <$TRANSFORM>) {
+        $line =~ s/^\s+//;
+        $line =~ s/\s+$//;
+        next if !$line || $line =~ /^#/;
+        my ($param, $transform) = split /=/, $line, 2;
+        die "Invalid transformation in $transform_file:\n$line\n"
+            unless $transform;
+        $param =~ s/\s+$//;
+        $transform =~ s/^\s+//;
+        $transform_of{$param} = $transform;
+    }
+    close $TRANSFORM or die "Cannot close $transform_file: $!";
+    return \%transform_of;
 }
 # Return true (1) if observation is to be filtered, i.e. does not
-# comply with at least one of the <param> <operator> <value> filter
+# comply with at least one line in filter file, where each line is one
-# criteria in filter file
+# or more alternatives <param> or <param> <operator> <value>
 sub filter_obs {
-    my $message_ref = shift;
+    my $msg_ref = shift;
     my $criteria_ref = shift;
+    my $num_alt_ref = shift; # gives the number of alternative
+                             # criteria still to be checked
+    return unless $criteria_ref;
-    my @ascii_params = qw(DDDD icao_id name obstime type);
+    my @ascii_params = qw(aircraft call_sign icao_id name obstime type wigosid);
-    foreach my $criterium (@$criteria_ref) {
+    # Note that the loop counter $i might be changed in the loop
+    for (my $i=0; $i < @{$criteria_ref}; $i++) {
+        my $num_alt = $num_alt_ref->[$i];
+        my $criterium = $criteria_ref->[$i];
         my ($f_param, $f_operator, $f_value) = split / +/, $criterium, 3;
-        chomp $f_value;
-        if ($f_operator eq '=') {
+        # First check for !$par, meaning $par should not be in the observation
-            return 1 unless exists $message_ref->{$f_param};
+        if (substr($f_param,0,1) eq '!') {
-            if (grep {$_ eq $f_param} @ascii_params) {
+            $f_param = substr($f_param,1);
-                $message_ref->{$f_param} =~ s/\s*$//;
+            # If parameter is present, criterium is not fullfilled
-                return 1 unless $message_ref->{$f_param} eq $f_value;
+            if (exists $msg_ref->{$f_param}) {
+                next if $num_alt; # More alternatives to check
+                # Criterium not fulfilled and no more alternatives to
+                # check. This observation should be filtered away
+                return 1;
             } else {
-                return 1 unless $message_ref->{$f_param} == $f_value;
+                # Criterium fulfilled. No need to check alternative criteria
+                $i += $num_alt if $num_alt;
+                next;
             }
-        } elsif ($f_operator eq '<') {
+        }
-            return 1 unless (exists $message_ref->{$f_param}
-                             and $message_ref->{$f_param} < $f_value);
+        # If parameter not present, criterium is obviously not fullfilled
-        } elsif ($f_operator eq '<=') {
+        if (not exists $msg_ref->{$f_param}) {
-            return 1 unless (exists $message_ref->{$f_param}
+            next if $num_alt;
-                             and $message_ref->{$f_param} <= $f_value);
+            return 1;
-        } elsif ($f_operator eq '>') {
+        }
-            return 1 unless (exists $message_ref->{$f_param}
-                             and $message_ref->{$f_param} > $f_value);
+        my $msg_value = $msg_ref->{$f_param};
-        } elsif ($f_operator eq '>=') {
+        # If a naked parameter criterium, we already know parameter is
-            return 1 unless (exists $message_ref->{$f_param}
+        # present (as found in previous check), so criterium is fulfilled
-                             and $message_ref->{$f_param} >= $f_value);
+        if (not defined $f_operator) {
-        } elsif ($f_operator eq '!=') {
+            if ($num_alt) {
-            return 1 unless exists $message_ref->{$f_param};
+                # No need to check the alternative criteria
+                $i += $num_alt;
+            }
+            next;
+        }
+        chomp $f_value;
+        my $op;
+        if ($f_operator eq '<'
+            || $f_operator eq '<='
+            || $f_operator eq '>'
+            || $f_operator eq '>='
+            || $f_operator eq '=~'
+            || $f_operator eq '!~') {
+            $op = $f_operator;
+        } elsif ($f_operator eq '='
+                 || $f_operator eq '!=') {
             if (grep {$_ eq $f_param} @ascii_params) {
-                $message_ref->{$f_param} =~ s/\s*$//;
+                $msg_value =~ s/\s*$//;
-                return 1 unless $message_ref->{$f_param} ne $f_value;
+                $op = ($f_operator eq '=') ? 'eq' : 'ne';
             } else {
-                return 1 unless $message_ref->{$f_param} != $f_value;
+                $op = ($f_operator eq '=') ? '==' : '!=';
             }
+        } else {
+            die "Internal error: unknown operator '$f_operator'";
         }
+	# Some parameters might need special massaging
+        if ($f_operator !~ /~/) {
+            if ($f_param eq 'wmonr' || $f_param eq 'buoy_id') {
+                # Make non octal by removing leading 0
+                $msg_value =~ s/^0+//;
+                $f_value =~ s/^0+// if $f_value != 0;
+            } elsif ($f_param eq 'nationalnr') {
+                # Convert to a pure numerical value (float). For 001101
+                # State id only numbers between 100 and 699 are operational
+                $msg_value =~ s/_0*/./;
+                $f_value =~ s/_0*/./;
+            } elsif ($f_param eq 'obstime') {
+                # Convert to a pure numerical value (float)
+                $msg_value =~ s/[-:]//g;
+                $msg_value =~ s/ /./;
+                $f_value =~ s/[-:']//g;
+                $f_value =~ s/^ +//;
+                $f_value =~ s/ +$//;
+                $f_value =~ s/ /./;
+            } elsif ($f_param eq 'name') {
+                # Add or correct quoting to "" and ignore casing
+                $msg_value = '"' . lc $msg_value . '"';
+                $f_value = lc $f_value;
+                if ($f_value =~ /^'.*'$/) {
+                    $f_value =~ s/^'//;
+                    $f_value =~ s/'$//;
+                }
+                if ($f_value !~ /^".*"$/) {
+                    $f_value = '"' . $f_value . '"';
+                }
+            }
+        }
+	my $condition = "$msg_value $op $f_value";
+	# Some values should be string values
+	if ($f_operator =~ /~/) {
+	    $condition = "q{$msg_value} $op $f_value";
+	} elsif (grep {$_ eq $f_param} @ascii_params) {
+	    $condition = "q{$msg_value} $op q{$f_value}";
+	}
+        # Finally, do the criterium check
+	if (eval $condition) {
+	    # No need to check the remaining alternative criteria
+	    $i += $num_alt if $num_alt;
+	    next;
+	} else {
+	    next if $num_alt;
+	    return 1;
+	}
     }
@@ Line 257: / Line 529: @@
 }
-sub usage {
+# Return the type of station requested, and the station
-    print <<"EOF";
+# identifications. Leave some leeway for how to list wmonr and
-Usage: $0 <bufr file(s)>
+# nationalnr (leading 0's might be omitted - added here)
-        [--filter <filter file>]
+sub get_requested_stations {
-        [--param <parameter file>]
+    my $req_stations = shift;
-        [--lon1 x1]
+    return if !$req_stations;
-        [--lat1 y1]
+    die "Station list must start with 'wmonr=', 'nationalnr=', "
-        [--lon2 x2]
+        . "'call_sign=', 'buoy_id=', 'aircraft=', 'icao_id' or 'wigosid='"
-        [--lat2 x2]
+        unless $req_stations
-        [--tablepath <path to BUFR tables>]
+        =~ /^(wmonr=|nationalnr=|call_sign=|buoy_id=|aircraft=|icao_id=|wigosid=)/;
-        [--help]
-EOF
+    my ($id, $rest) = split /=/, $req_stations;
-    exit 0;
+    my @stations = split /,/, $rest;
+    if ($id eq 'wmonr') {
+        my @req_stn;
+        foreach my $station (@stations) {
+            # Turn $station into a 5 digit wmonr
+            $station =~ s/^0+//;
+            $station += 1000 if $station < 1000;
+            $station = sprintf("%05d", $station);
+            push @req_stn, $station;
+        }
+        return ($id, \@req_stn);
+    } elsif ($id eq 'nationalnr') {
+        my @req_stn;
+        foreach my $station (@stations) {
+            # Turn national station number into 10 digits
+            my ($state_id, $national_id) = split /_/, $station;
+            die "Uncorrected formatted station: '$station' in station list"
+                if !defined $national_id or $national_id eq '';
+            $station = $state_id . '_' . sprintf("%010d", $national_id);
+            push @req_stn, $station;
+        }
+        return ($id, \@req_stn);
+    } else {
+        return ($id, \@stations);
+    }
 }
-sub usage_verbose {
+# Return true (1) if observation does not contain one of the stations
-    print <<"EOF";
+# listed in @$req_stn_ref (of type $req_id), i.e. if this observation
+# should be filtered away
+sub filter_station {
+    my ($msg_ref, $req_id, $req_stn_ref) = @_;
-Usage: $0 <bufr file(s)> [options]
+    return 1 unless $msg_ref->{$req_id};
+    my $stn = $msg_ref->{$req_id};
-Will print section 4 in BUFR messages in <bufr file(s)> as "parameter=value" lines.
+    return !grep { $_ eq $stn } @$req_stn_ref;
+}
-Options (may be abbreviated, e.g. --h for --help) are:
-        --filter <filter file>
+# When --sort_on is used, need to supply the sort method. Also returns
-                        Decode observations meeting criteria in <filter file> only
+# input parameter $sort_on with possible trailing '-' (or '+')
-        --param <parameter file>
+# stripped off
-                        Print parameters in <parameter file> only, in same order
+sub get_sort_method {
-                        as they occur in <parameter file>
+    my ($sort_on, $sort) = @_;
-        --lon1 x1       Decode observations with longitude >= x1 only
+    return if !$sort_on;
-        --lat1 y1       Decode observations with latitude >= y1 only
-        --lon2 x2       Decode observations with longitude <= x2 only
-        --lat2 y2       Decode observations with latitude <= y2 only
-                        x1,y1,x2,y2 should be decimal degrees
-        --tablepath <path to BUFR tables>
-                        Set path to BUFR tables (overrides ENV{BUFR_TABLES})
-        --help          Print this Usage
+    my $ascending_sort = 1;
+    # A minus sign appended to the sort parameter means descending sort
+    if ($sort_on =~ /-$/) {
+        $ascending_sort = 0;
+        chop $sort_on;
+    }
+    # Just in case someone adds a '+' to signify ascending sort
+    if ($sort_on =~ /[+]$/) {
+        chop $sort_on;
+    }
-You should probably set
+    my @ascii_params = qw(aircraft call_sign icao_id obstime name type wigosid);
-        export BUFR_TABLES=$DEFAULT_TABLE_PATH
+    my $lexical_sort = grep {$_ eq $sort_on} @ascii_params;
-or use the --tablepath option.
-The lines in <parameter file> should be name of the parameters you
+    my $sort_sub;
-want to be printed. For example, if you want only station
+    if ($sort) {
-identification and temperature to be printed for a BUFR SYNOP file,
+        $sort_sub = sub {
-the <parameter file> should look like this:
+            my ($stn_a, $val_a) = split /\|/, $a;
+            my ($stn_b, $val_b) = split /\|/, $b;
+            # Sort on stationid is always lexical and ascending
+            my $cmp = $stn_a cmp $stn_b;
+            if ($cmp) {
+                return $cmp;
+            } else {
+                # Stationids are equal. Sort on value of sort_on parameter
+                if ($lexical_sort) {
+                    if ($ascending_sort) {
+                        return $val_a cmp $val_b;
+                    } else {
+                        return $val_b cmp $val_a;
+                    }
+                } else {
+                    # Numerical sort
+                    if ($ascending_sort) {
+                        return $val_a <=> $val_b;
+                    } else {
+                        return $val_b <=> $val_a;
+                    }
+                }
+            }
+        }
+    } else {
+        # Sort restricted to sort_on parameter
+        $sort_sub = sub {
+            if ($lexical_sort) {
+                if ($ascending_sort) {
+                    return $a cmp $b;
+                } else {
+                    return $b cmp $a;
+                }
+            } else {
+                # Numerical sort
+                if ($ascending_sort) {
+                    return $a <=> $b;
+                } else {
+                    return $b <=> $a;
+                }
+            }
+        }
+    }
-wmonr
+    return ($sort_on, $sort_sub);
-DDDD
+}
-TA
+=pod
+=head1 SYNOPSIS
+  bufrdump.pl <bufr file(s)>
+      [--filter <filter file | filter list>]
+      [--param <parameter file | parameter list> [--csv [--delimiter <del>]]
+      [--sort]
+      [--sort_on <parameter>[-]]
+      [--station <station list>]
+      [--transform <transformation file>]
+      [--lon1 <x1>]
+      [--lat1 <y1>]
+      [--lon2 <x2>]
+      [--lat2 <y2>]
+      [--obstype <amdar|ocea|surface|sounding|sounding->]
+      [--tablepath <path to BUFR tables>]
+      [--help]
+=head1 DESCRIPTION
+Extracts BUFR messages from BUFR file(s) and prints section 4 as
+"parameter=value" lines. Calls the Fortran program bufrdump
+internally, so this program must be installed at the location set in
+variable $BUFRDUMP in source code.
+Execute without arguments for Usage, with option C<--help> for some
+additional info. See also L</https://wiki.met.no/bufr.pm/start> for
+examples of use.
+=head1 OPTIONS
+  --filter <filter file | filter list>
+                  Decode observations meeting criteria in filter file or
+                  filter list only
+  --param <parameter file | parameter list> [--csv [--delimiter <del>]]
+                  Print parameters in parameter file or comma
+                  separated list (e.g. wmonr,TA) only, in same order
+                  as they occur there. If using --csv possibly
+                  followed by --delimiter <del>, the parameters vill
+                  be printed using the CSV (comma-separated values)
+                  format, with the delimiter del (default is ';')
+  --sort          Sort the decoded observations on station identification;
+                  first stations with wmonr, then stations with nationalnr,
+                  call_sign, buoy_id, aircraft, icao_id or wigosid
+                  (others left out)
+  --sort_on <parameter>[-] Sort the decoded observations on increasing
+                  values of parameter, or decreasing values if a '-'
+                  follows the parameter name. E.g. --sort_on TA- will
+                  sort on decreasing temperatures. Observations not
+                  containing the parameter at all will be printed lastly,
+                  except when --sort_on is combined with --sort (in which
+                  case sorting is done firstly on station identification,
+                  secondly on parameter with missing values printed first)
+  --station <station list>
+                  Print observations for stations in station list only,
+                  e.g. wmonr=01384,01492
+  --transform <transformation file>
+                  Do the transformations of parameter values listed in
+                  transformation file
+  --lon1 <x1>     Decode observations with longitude >= x1 only
+  --lat1 <y1>     Decode observations with latitude >= y1 only
+  --lon2 <x2>     Decode observations with longitude <= x2 only
+  --lat2 <y2>     Decode observations with latitude <= y2 only
+                  x1,y1,x2,y2 should be decimal degrees
+  --obstype <amdar|ocea|surface|sounding|sounding->]
+                  Force observation type. If this option is not set,
+                  will make an educated guess of observation type
+                  based on metadata in section 1 of each BUFR message
+  --tablepath <path to BUFR tables>
+                  Set path to BUFR tables (overrides ENV{BUFR_TABLES})
+  --help          Print this Usage (but you might instead prefer to use
+                  perldoc bufrdump.pl)
+Options may be abbreviated, e.g. --h or -h for --help.
+To avoid having to use the C<--tablepath> option, you are adviced to
+set the environment variable BUFR_TABLES to the directory where your
+BUFR tables are located (unless the default path provided by
+bufrdump.pl works for you).
+The lines in <parameter file>, or the comma separated values in
+<parameter list>, should be name of the parameters you want to be
+printed. For example, if you want only station identification and
+temperature to be printed for a BUFR SYNOP file, either supply
+  wmonr,nationalnr,call_sign,TA
+as argument to --params, or supply a <parameter file> which should
+look like this:
+  wmonr
+  nationalnr
+  call_sign
+  TA
 If you want "parameter=value" to be printed also when value is missing
 in BUFR message, precede the parameter name with an exclamation mark
-(e.g. '!TA').  Missing values will then be displayed as -32767.
+(e.g. '!TA'). Missing values will then be displayed as -32767. If the
+argument to --param is a parameter list, you must prevent the shell
+from attaching special meaning to the exclamation mark by enclosing the
+list in single quotes.
+If the parameter list consists of one parameter only, a comma must be
+appended (e.g. 'wmonr,') because bufrdump.pl uses the appearence of
+comma to signal that this is not a filename but parameter name(s).
+If --csv is used in conjunction with --param, all values will be
+printed using the CSV format, with first line listing the parameters,
+and with missing fields printed as -32767 if the parameter is marked
+with '!' in parameter file or list. With the parameter file above, the
+listing may for example start like
+  wmonr;nationalnr;call_sign;TA
+;;;-1.5
+  ;;LF5U;9.0
+You can choose another delimiter than semicolon by use of option
+--delimiter <del>, e.g. --csv --delimiter ','
 Using --filter will decode only those observations that meet at least
@@ Line 317: / Line 778: @@
 in <filter file>, where the BUFR descriptor criteria should come first
 in filter file followed by a blank line, then comes the parameter
-criteria which should match <param> <operator> <value> where operator
+criteria which should match <param> or !<param> or <param> <operator>
-is one of =, !=, <, <=, > and >=. An example filter file is
+<value> where operator is one of =, !=, =~, !~, <, <=, > and >=. What
+follows =~ and !~ should be a Perl match regular expression. The parameter
+criteria may be phrased as alternatives by separating them with '|' on
+a single line. An example filter file is
-D: 001001 I2.2
+  D: 001001 I2.2
-D: 001001 I2.2 001002 I3.3
+  D: 001001 I2.2 001002 I3.3
 895
 252
-D: 001011 A9
+  D: 001011 A9
-LDWR
+  LF5U
-NN != 0
+  type = Manned
-TA >= 5
+  NN != 8
-TA < 9.5
+  TA >= 5
+  TA < 9.5
+  RR_24
 which decodes all observations with block number 01, two other
-specific wmo stations and one specific ship, having cloud cover
+specific wmo stations and one specific ship, where stations should be
-different from 0 (but NN must be part of the message) and temperature
+manned and have cloud cover with a value different from 8, and have
-between 5 and 9.5 degrees Celsius. Comment lines starting with #
+temperature between 5 and 9.5 degrees Celsius, and contain
-will be ignored.
+precipitation for last 24 hours. Comment lines starting with # will be
+ignored.
-EOF
+Another example: the filter file (starting with a blank line!)
-    exit 0;
-}
+  call_sign =~ /^L[A-N]..$/
+  obstime >= '2012-02-10 06:00:00'
+  HW | HWA | PW | PWA
+  FF > 10 | FG_010 > 10
+will print only those ship observations for which the 4 character
+call_sign starts with 2 letters in the interval LA-LN, and having
+obstime larger or equal to the datetime given, and containing wave
+data (specifically: height or period of waves, manually or
+automatically measured), and with wind or 10 minutes gust more than 10
+m/s.
+For convenience, when there are no BUFR descriptor criteria, you might
+provide the filter criteria on the command line. Example:
+--filter 'wmonr,TA > 0,RR_12 | RR_24, !FF'
+will decode only observations with wmonr, having positive temperature
+and containing precipitation for 12 or 24 hours and not reporting
+wind. If (like for --param) the filter list consists of one criterium
+only, a comma must be appended.
+To avoid the need of creating a filter file when observations for some
+few stations are requested, you can provide the stations in a comma
+separated list after option --station. Some examples:
+  --station wmonr=01001,01152,01492
+  --station nationalnr=614_0050410003,637_108
+  --station call_sign=LF5U
+  --station buoyid=64607,64609
+  --station aircraft=EU3421,JHCWUURA
+  --station icao_id=ENGM,ENBO
+  --station wigosid=0-376-0-511,0-20000-0-01492
+You cannot mix different kinds of stations this way (before '=' you
+must choose either wmonr, nationalnr, call_sign, buoy_id, aircraft,
+icao_id or wigosid). Note also that providing the stations in the BUFR
+descriptor part (first part) of the filter file will speed up
+execution time considerably, compared to using option --station. It is
+possible to combine --filter with --station if done with some care,
+e.g. specifying WMO block 01 and the required parameters in filter
+file, then the requested stations in station list.
+The --transform option is provided mainly to be able to use other
+units than what is default in bufrdump.pl. The transformation file
+should list the transformations wanted, one per line as
+  <parameter> = <perl expression involving $x>
+where $x is original value of the parameter.
+For example, the following transformation file will display wind speed
+FF and wind gust FG in knots instead of m/s, rounded to one decimal,
+and cloud cover NN in % (instead of the default which is using WMO
+code table 2700, roughly counting octas):
+  FF = sprintf("%.1f", $x*1.9438)
+  FG = sprintf("%.1f", $x*1.9438)
+  NN = int($x*12.5 + .5)
+If --transform is combined with --filter, the filter criteria should
+refer to the transformed values. E.g. if the above NN transform to %
+is to be applied for sky not all covered by clouds, you should use NN
+!= 100 instead of NN != 8 in filter file.
+The --obstype option might be handy in some special cases, like when
+you are interested only in the surface part of oceanographic data
+(then use '--obstype surface'), or when you want to see only levels
+with vss>0 in high resolution radiosonde data (then use '--obstype
+sounding-'), or when data category and/or data sub-category in the
+BUFR messages have unusual values.
+=head1 AUTHOR
+Pål Sannes E<lt>pal.sannes@met.noE<gt>
+=head1 COPYRIGHT
+Copyright (C) 2010 met.no
+=cut
 </code>