bufr.pm:sortbufrtemp.pl

#!/usr/bin/perl -w
 
# (C) Copyright 2015, met.no
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
 
# pod included at end of file
 
use strict;
use Getopt::Long;
use Pod::Usage qw(pod2usage);
use Geo::BUFR;
 
# Will be used if neither --tablepath nor $ENV{BUFR_TABLES} is set
use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables';
 
our @pressure_level_desc = (
    '004086',  # Long time period or displacement [s]
    '008042',  # Extended vertical sounding significance [flag table]
    '007004',  # Pressure [PA]
    '010009',  # Geopotential height [GPM]
    '005015',  # Latitude displacement (high accuracy) [degree]
    '006015',  # Longitude displacement (high accuracy) [degree]
    '012101',  # Temperature/dry-bulb temperature [K]
    '012103',  # Dew-point temperature [K]
    '011001',  # Wind direction [degree true]
    '011002',  # Wind speed [m/s]
    );
 
# Parse command line options
our %option = ();
GetOptions(
           \%option,
           'help',
           'outfile=s',
           'tablepath=s',
           'verbose=i',
       ) or pod2usage(-verbose => 0);
 
# User asked for help
pod2usage(-verbose => 1) if $option{help};
 
# Make sure there is 1 input file
pod2usage(-verbose => 0) unless @ARGV == 1;
 
our $verbose = $option{verbose} ? $option{verbose} : 0;
 
# Set BUFR table path
if ($option{tablepath}) {
    # Command line option --tablepath overrides all
    Geo::BUFR->set_tablepath($option{tablepath});
} elsif ($ENV{BUFR_TABLES}) {
    # If no --tablepath option, use the BUFR_TABLES environment variable
    Geo::BUFR->set_tablepath($ENV{BUFR_TABLES});
} else {
    # If all else fails, use the libbufr bufrtables
    Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH);
}
 
my $infile = $ARGV[0];
open(my $IN, '<',$infile)
    or die "Cannot open $infile: $!";
 
# Where to print the altered BUFR message(s)
my $OUT;
if ($option{outfile}) {
    open($OUT, '>', $option{outfile})
        or die "Cannot open $option{outfile} for writing: $!";
} else {
    $OUT = *STDOUT;
}
 
my $bufr = Geo::BUFR->new();
$bufr->fopen($infile);
my @subset_data; # Will contain data values for subset 1,2...
my @subset_desc; # Will contain the set of descriptors for subset 1,2...
my $new_bufr;
 
READLOOP:
while (not $bufr->eof()) {
    # Read (and decode) next observation, skipping messages that
    # cannot be decoded
    my ($data, $desc);
        eval {
            ($data, $desc) = $bufr->next_observation();
        };
        if ($@) {
            warn $@;
            # Try to extract message number and ahl of the bulletin
            # where the error occurred
            my $current_message_number = $bufr->get_current_message_number();
            if (defined $current_message_number) {
                my $error_msg = "In message $current_message_number";
                my $current_ahl = $bufr->get_current_ahl();
                $error_msg .= " contained in bulletin with ahl $current_ahl\n"
                    if $current_ahl;
                warn $error_msg if $error_msg;
            }
            next READLOOP;
        }
    next READLOOP if !$data;
 
    my $isub = $bufr->get_current_subset_number();
    my $nsubsets = $bufr->get_number_of_subsets();
    my $imsg = $bufr->get_current_message_number();
    my $ahl = $bufr->get_current_ahl();
    if ($isub == 1) {
        $new_bufr = Geo::BUFR->new();
        $new_bufr->copy_from($bufr,'metadata');
        @subset_data = ();
        @subset_desc = ();
    }
 
    my $descriptors_unexpanded = $bufr->get_descriptors_unexpanded();
    if ($descriptors_unexpanded !~ /^30905(2|3)/) {
	if ($verbose > 0 && $isub == 1) {
	    print "Removing message $imsg";
	    print " contained in bulletin with ahl $ahl" if $ahl;
	    print " since unexpanded descriptors did not start with 309052 or 309053\n";
	}
	next;
    } else {
	if ($verbose > 3) {
	    print "Processing subset $isub in message $imsg";
	    print " contained in bulletin with ahl $ahl" if $ahl;
	    print "\n";
	}
        my @plevels; # array of references to the 10 data values of
                     # one pressure level
        my $i_eddrf = ($descriptors_unexpanded =~ /^309052/) ? 29 : 19;
	my $i1 = $i_eddrf;
        while ($desc->[$i1] ne '031001') {
            my @record = @$data[$i1 .. $i1+9];
            if (defined $record[2]) {
                push @plevels, \@record;
            } elsif ($verbose > 2) {
                print "Missing pressure in level ", ($i1 - $i_eddrf)/10 + 1
                    . " in subset $isub in Message number $imsg";
                print " in $ahl" if $ahl;
                print "\n";
            }
            $i1 += 10;
        }
        if (!@plevels || already_sorted(\@plevels)) {
            # Reencode the BUFR message with no changes
            $subset_data[$isub] = $data;
            $subset_desc[$isub] = $desc;
        } else {
            if ($verbose > 0) {
                print "Need to sort (or merge) subset $isub"
                    . " in Message number $imsg";
                print " in $ahl" if $ahl;
                print "\n";
            }
            my ($new_data, $new_desc)
                = get_sorted_data($data,$desc,\@plevels,$i1,$i_eddrf);
            $subset_data[$isub] = $new_data;
            $subset_desc[$isub] = $new_desc;
        }
    }
 
    if ($isub == $nsubsets) {
        print $OUT $new_bufr->encode_message(\@subset_data, \@subset_desc);
    }
}
$bufr->fclose();
 
## Will return in ($new_data, $new_desc) the same data as in
## ($data,$desc) except that pressure levels are sorted on decreasing
## pressure values, also merging levels with same pressure (if time
## and or position coordinates are equal in the 2 pressure levels or
## missing)
sub get_sorted_data {
    my ($data,$desc,$plevels,$i1,$i_eddrf) = @_;
    my ($new_data,$new_desc);
 
    # Section 4 up to where pressure level data starts is unaltered
    for my $ii (0 .. $i_eddrf-2) {
        $new_desc->[$ii] = $desc->[$ii];
        $new_data->[$ii] = $data->[$ii];
    }
 
    # Then handle pressure levels (and eddrf)
    $new_desc->[$i_eddrf-1] = '031002';
    $new_data->[$i_eddrf-1] = undef; # Will be filled with $eddrf later
    my $eddrf = 0; # extended delayed descriptor replication factor
 
    # Use a Schwartzian transform to sort on descending pressure levels
    my @sorted_plevels = map { $_->[0] }
                         sort { $b->[1] <=> $a->[1] }
                         map { [$_, $_->[2]] } @{$plevels};
 
    # Levels with same pressure and same time and position should be
    # coalesced into one
    my @prev_plevel = @{$sorted_plevels[0]};
    my $prev_PP = $prev_plevel[2];
    for my $ii (1 .. (@{$plevels}-1))  {
        my $PP = $sorted_plevels[$ii]->[2];
        if ($PP == $prev_PP
            && same_pos_time(\@prev_plevel,$sorted_plevels[$ii])) {
            # Combine the two levels into 1
            if ($verbose > 2) {
                print "Merging pressure level for pressure = $PP\n";
            }
            my @new_plevel;
            $new_plevel[0] = defined($prev_plevel[0])
                ? $prev_plevel[0] : $sorted_plevels[$ii]->[0];
            my $evss = combine_evss
                ($sorted_plevels[$ii-1]->[1], $sorted_plevels[$ii]->[1]);
            $new_plevel[1] = $evss;
            $new_plevel[2] = $PP;
            for (3 .. 9) {
                $new_plevel[$_] = defined($prev_plevel[$_])
                    ? $prev_plevel[$_] : $sorted_plevels[$ii]->[$_];
            }
            @prev_plevel = @new_plevel;
        } else {
            push @$new_data, @prev_plevel;
            push @$new_desc, @pressure_level_desc;
            @prev_plevel = @{$sorted_plevels[$ii]};
            $eddrf++;
        }
        $prev_PP = $PP;
    }
    # Add last level
    push @$new_data, @prev_plevel;
    push @$new_desc, @pressure_level_desc;
    $eddrf++;
 
    $new_data->[$i_eddrf-1] = $eddrf;
 
    # Section 4 after pressure level data is unaltered
    # (assuming windshear levels are sorted already)
    push @$new_data, @$data[$i1 .. @$data-1];
    push @$new_desc, @$desc[$i1 .. @$data-1];
 
    return ($new_data, $new_desc);
}
 
## Combine two extended vertical sounding significance (evss) into one
sub combine_evss {
    my ($evss1,$evss2) = @_;
    if (defined($evss1) && defined($evss2)) {
        my $binary1 = pack "N", $evss1; # Packed as 32 bits in big-endian order
        my $binary2 = pack "N", $evss2;
        my $binary = $binary1 | $binary2; # bitwise or
        return unpack "N", $binary;
    } elsif (defined($evss1)) {
        return $evss1;
    } else {
        return $evss2;
    }
}
 
## Return true (1) if pressure levels are sorted and unique, else
## return false (0)
sub already_sorted {
    my $plevels = shift;
 
    return 1 if !@{$plevels};
    my @PP = map { $_->[2] } @{$plevels};
    my $prev_PP = 1000000000; # Surely bigger than any real PP
    my $ii = 0;
    foreach my $PP (@PP) {
	if ($PP > $prev_PP) {
	    if ($verbose > 1) {
		print "Not sorted after pressure level $prev_PP";
		print " (next level is $PP)\n";
	    }
	    return 0;
	}
        if ($PP == $prev_PP) {
            if (same_pos_time($plevels->[$ii],$plevels->[$ii-1])) {
		if ($verbose > 1) {
		    print "Duplicate levels at pressure $PP\n";
		}
		return 0;
	    }
	}
        $prev_PP = $PP;
        $ii++;
    }
    return 1;
}
 
## Return false (0) if the two (references) to pressure levels have
## position or time defined with differing values, else returns true
## (1)
sub same_pos_time {
    my ($p1,$p2) = @_;
 
    if (defined($p1->[0]) && defined($p2->[0]) && $p1->[0] != $p2->[0]) {
        return 0; # 004086 'Long time period or displacement' differ
    } elsif (defined($p1->[4]) && defined($p2->[4]) && $p1->[4] != $p2->[4]) {
        return 0; # 005015 'Latitude displacement' differ
    } elsif (defined($p1->[5]) && defined($p2->[5]) && $p1->[5] != $p2->[5]) {
        return 0; # 005015 'Latitude displacement' differ
    }
    return 1;
}
 
=pod
 
=head1 SYNOPSIS
 
  sortbufrtemp.pl <bufr file>
      [--outfile <sorted bufr file>]
      [--tablepath <path to BUFR tables>]
      [--verbose n]
      [--help]
 
=head1 DESCRIPTION
 
Will extract from <bufr file> all BUFR TEMPs (recognized by unexpanded
descriptors in section 3 starting with 309052 or 309053), sorting the
pressure levels and merging duplicate pressure levels (having same
pressure, time and location coordinates) into one single level,
printing to STDOUT unless C<--outfile> is set. Pressure levels with
missing pressure will be removed.
 
Execute without arguments for Usage, with option C<--help> for some
additional info.
 
=head1 OPTIONS
 
 
   --outfile <filename>
                    Will print to <filename> instead of STDOUT
   --tablepath <path to BUFR tables>
                    Set path to BUFR tables (overrides $ENV{BUFR_TABLES})
   --verbose n      Set verbose level to n, 0<=n<=3 (default 0).
                    n>=1: prints which BUFR messages were actually altered,
                          and which messages were removed because unexpanded
                          descriptors did not start with 309052 or 309053
                    n>=2: prints at which pressure level sorting
                          breaks, or first duplicate level encountered
                          (whichever of these conditions happened
                          first)
                    n>=3: prints which pressure levels were merged,
                          and which levels were removed due to missing
                          pressure
                    n=4: prints which subset, message number and ahl
                         (if exists) is currently processed
                    Verbose output is sent to STDOUT, so ought to be
                    combined with option --outfile
   --help           Display Usage and explain the options used. Almost
                    the same as consulting perldoc sortbufrtemp.pl
 
Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>.
 
To avoid having to use the C<--tablepath> option, you are adviced to
set the environment variable BUFR_TABLES to the directory where your
BUFR tables are located (unless the default path provided by
sortbufrtemp.pl works for you).
 
=head1 AUTHOR
 
Pål Sannes E<lt>pal.sannes@met.noE<gt>
 
=head1 COPYRIGHT
 
Copyright (C) 2015 met.no
 
=cut
This website uses cookies. By using the website, you agree with storing cookies on your computer. Also you acknowledge that you have read and understand our Privacy Policy. If you do not agree leave the website.More information about cookies
  • bufr.pm/sortbufrtemp.pl.txt
  • Last modified: 2022-05-31 09:29:31
  • (external edit)