#!/usr/bin/perl -w # (C) Copyright 2015, met.no # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # pod included at end of file use strict; use Getopt::Long; use Pod::Usage qw(pod2usage); use Geo::BUFR; # Will be used if neither --tablepath nor $ENV{BUFR_TABLES} is set use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables'; our @pressure_level_desc = ( '004086', # Long time period or displacement [s] '008042', # Extended vertical sounding significance [flag table] '007004', # Pressure [PA] '010009', # Geopotential height [GPM] '005015', # Latitude displacement (high accuracy) [degree] '006015', # Longitude displacement (high accuracy) [degree] '012101', # Temperature/dry-bulb temperature [K] '012103', # Dew-point temperature [K] '011001', # Wind direction [degree true] '011002', # Wind speed [m/s] ); # Parse command line options our %option = (); GetOptions( \%option, 'help', 'outfile=s', 'tablepath=s', 'verbose=i', ) or pod2usage(-verbose => 0); # User asked for help pod2usage(-verbose => 1) if $option{help}; # Make sure there is 1 input file pod2usage(-verbose => 0) unless @ARGV == 1; our $verbose = $option{verbose} ? $option{verbose} : 0; # Set BUFR table path if ($option{tablepath}) { # Command line option --tablepath overrides all Geo::BUFR->set_tablepath($option{tablepath}); } elsif ($ENV{BUFR_TABLES}) { # If no --tablepath option, use the BUFR_TABLES environment variable Geo::BUFR->set_tablepath($ENV{BUFR_TABLES}); } else { # If all else fails, use the libbufr bufrtables Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH); } my $infile = $ARGV[0]; open(my $IN, '<',$infile) or die "Cannot open $infile: $!"; # Where to print the altered BUFR message(s) my $OUT; if ($option{outfile}) { open($OUT, '>', $option{outfile}) or die "Cannot open $option{outfile} for writing: $!"; } else { $OUT = *STDOUT; } my $bufr = Geo::BUFR->new(); $bufr->fopen($infile); my @subset_data; # Will contain data values for subset 1,2... my @subset_desc; # Will contain the set of descriptors for subset 1,2... my $new_bufr; READLOOP: while (not $bufr->eof()) { # Read (and decode) next observation, skipping messages that # cannot be decoded my ($data, $desc); eval { ($data, $desc) = $bufr->next_observation(); }; if ($@) { warn $@; # Try to extract message number and ahl of the bulletin # where the error occurred my $current_message_number = $bufr->get_current_message_number(); if (defined $current_message_number) { my $error_msg = "In message $current_message_number"; my $current_ahl = $bufr->get_current_ahl(); $error_msg .= " contained in bulletin with ahl $current_ahl\n" if $current_ahl; warn $error_msg if $error_msg; } next READLOOP; } next READLOOP if !$data; my $isub = $bufr->get_current_subset_number(); my $nsubsets = $bufr->get_number_of_subsets(); my $imsg = $bufr->get_current_message_number(); my $ahl = $bufr->get_current_ahl(); if ($isub == 1) { $new_bufr = Geo::BUFR->new(); $new_bufr->copy_from($bufr,'metadata'); @subset_data = (); @subset_desc = (); } my $descriptors_unexpanded = $bufr->get_descriptors_unexpanded(); if ($descriptors_unexpanded !~ /^30905(2|3)/) { if ($verbose > 0 && $isub == 1) { print "Removing message $imsg"; print " contained in bulletin with ahl $ahl" if $ahl; print " since unexpanded descriptors did not start with 309052 or 309053\n"; } next; } else { if ($verbose > 3) { print "Processing subset $isub in message $imsg"; print " contained in bulletin with ahl $ahl" if $ahl; print "\n"; } my @plevels; # array of references to the 10 data values of # one pressure level my $i_eddrf = ($descriptors_unexpanded =~ /^309052/) ? 29 : 19; my $i1 = $i_eddrf; while ($desc->[$i1] ne '031001') { my @record = @$data[$i1 .. $i1+9]; if (defined $record[2]) { push @plevels, \@record; } elsif ($verbose > 2) { print "Missing pressure in level ", ($i1 - $i_eddrf)/10 + 1 . " in subset $isub in Message number $imsg"; print " in $ahl" if $ahl; print "\n"; } $i1 += 10; } if (!@plevels || already_sorted(\@plevels)) { # Reencode the BUFR message with no changes $subset_data[$isub] = $data; $subset_desc[$isub] = $desc; } else { if ($verbose > 0) { print "Need to sort (or merge) subset $isub" . " in Message number $imsg"; print " in $ahl" if $ahl; print "\n"; } my ($new_data, $new_desc) = get_sorted_data($data,$desc,\@plevels,$i1,$i_eddrf); $subset_data[$isub] = $new_data; $subset_desc[$isub] = $new_desc; } } if ($isub == $nsubsets) { print $OUT $new_bufr->encode_message(\@subset_data, \@subset_desc); } } $bufr->fclose(); ## Will return in ($new_data, $new_desc) the same data as in ## ($data,$desc) except that pressure levels are sorted on decreasing ## pressure values, also merging levels with same pressure (if time ## and or position coordinates are equal in the 2 pressure levels or ## missing) sub get_sorted_data { my ($data,$desc,$plevels,$i1,$i_eddrf) = @_; my ($new_data,$new_desc); # Section 4 up to where pressure level data starts is unaltered for my $ii (0 .. $i_eddrf-2) { $new_desc->[$ii] = $desc->[$ii]; $new_data->[$ii] = $data->[$ii]; } # Then handle pressure levels (and eddrf) $new_desc->[$i_eddrf-1] = '031002'; $new_data->[$i_eddrf-1] = undef; # Will be filled with $eddrf later my $eddrf = 0; # extended delayed descriptor replication factor # Use a Schwartzian transform to sort on descending pressure levels my @sorted_plevels = map { $_->[0] } sort { $b->[1] <=> $a->[1] } map { [$_, $_->[2]] } @{$plevels}; # Levels with same pressure and same time and position should be # coalesced into one my @prev_plevel = @{$sorted_plevels[0]}; my $prev_PP = $prev_plevel[2]; for my $ii (1 .. (@{$plevels}-1)) { my $PP = $sorted_plevels[$ii]->[2]; if ($PP == $prev_PP && same_pos_time(\@prev_plevel,$sorted_plevels[$ii])) { # Combine the two levels into 1 if ($verbose > 2) { print "Merging pressure level for pressure = $PP\n"; } my @new_plevel; $new_plevel[0] = defined($prev_plevel[0]) ? $prev_plevel[0] : $sorted_plevels[$ii]->[0]; my $evss = combine_evss ($sorted_plevels[$ii-1]->[1], $sorted_plevels[$ii]->[1]); $new_plevel[1] = $evss; $new_plevel[2] = $PP; for (3 .. 9) { $new_plevel[$_] = defined($prev_plevel[$_]) ? $prev_plevel[$_] : $sorted_plevels[$ii]->[$_]; } @prev_plevel = @new_plevel; } else { push @$new_data, @prev_plevel; push @$new_desc, @pressure_level_desc; @prev_plevel = @{$sorted_plevels[$ii]}; $eddrf++; } $prev_PP = $PP; } # Add last level push @$new_data, @prev_plevel; push @$new_desc, @pressure_level_desc; $eddrf++; $new_data->[$i_eddrf-1] = $eddrf; # Section 4 after pressure level data is unaltered # (assuming windshear levels are sorted already) push @$new_data, @$data[$i1 .. @$data-1]; push @$new_desc, @$desc[$i1 .. @$data-1]; return ($new_data, $new_desc); } ## Combine two extended vertical sounding significance (evss) into one sub combine_evss { my ($evss1,$evss2) = @_; if (defined($evss1) && defined($evss2)) { my $binary1 = pack "N", $evss1; # Packed as 32 bits in big-endian order my $binary2 = pack "N", $evss2; my $binary = $binary1 | $binary2; # bitwise or return unpack "N", $binary; } elsif (defined($evss1)) { return $evss1; } else { return $evss2; } } ## Return true (1) if pressure levels are sorted and unique, else ## return false (0) sub already_sorted { my $plevels = shift; return 1 if !@{$plevels}; my @PP = map { $_->[2] } @{$plevels}; my $prev_PP = 1000000000; # Surely bigger than any real PP my $ii = 0; foreach my $PP (@PP) { if ($PP > $prev_PP) { if ($verbose > 1) { print "Not sorted after pressure level $prev_PP"; print " (next level is $PP)\n"; } return 0; } if ($PP == $prev_PP) { if (same_pos_time($plevels->[$ii],$plevels->[$ii-1])) { if ($verbose > 1) { print "Duplicate levels at pressure $PP\n"; } return 0; } } $prev_PP = $PP; $ii++; } return 1; } ## Return false (0) if the two (references) to pressure levels have ## position or time defined with differing values, else returns true ## (1) sub same_pos_time { my ($p1,$p2) = @_; if (defined($p1->[0]) && defined($p2->[0]) && $p1->[0] != $p2->[0]) { return 0; # 004086 'Long time period or displacement' differ } elsif (defined($p1->[4]) && defined($p2->[4]) && $p1->[4] != $p2->[4]) { return 0; # 005015 'Latitude displacement' differ } elsif (defined($p1->[5]) && defined($p2->[5]) && $p1->[5] != $p2->[5]) { return 0; # 005015 'Latitude displacement' differ } return 1; } =pod =head1 SYNOPSIS sortbufrtemp.pl [--outfile ] [--tablepath ] [--verbose n] [--help] =head1 DESCRIPTION Will extract from all BUFR TEMPs (recognized by unexpanded descriptors in section 3 starting with 309052 or 309053), sorting the pressure levels and merging duplicate pressure levels (having same pressure, time and location coordinates) into one single level, printing to STDOUT unless C<--outfile> is set. Pressure levels with missing pressure will be removed. Execute without arguments for Usage, with option C<--help> for some additional info. =head1 OPTIONS --outfile Will print to instead of STDOUT --tablepath Set path to BUFR tables (overrides $ENV{BUFR_TABLES}) --verbose n Set verbose level to n, 0<=n<=3 (default 0). n>=1: prints which BUFR messages were actually altered, and which messages were removed because unexpanded descriptors did not start with 309052 or 309053 n>=2: prints at which pressure level sorting breaks, or first duplicate level encountered (whichever of these conditions happened first) n>=3: prints which pressure levels were merged, and which levels were removed due to missing pressure n=4: prints which subset, message number and ahl (if exists) is currently processed Verbose output is sent to STDOUT, so ought to be combined with option --outfile --help Display Usage and explain the options used. Almost the same as consulting perldoc sortbufrtemp.pl Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>. To avoid having to use the C<--tablepath> option, you are adviced to set the environment variable BUFR_TABLES to the directory where your BUFR tables are located (unless the default path provided by sortbufrtemp.pl works for you). =head1 AUTHOR Pål Sannes Epal.sannes@met.noE =head1 COPYRIGHT Copyright (C) 2015 met.no =cut