#!/usr/bin/perl -w
 
# (C) Copyright 2015, met.no
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
 
# pod included at end of file
 
use strict;
use Getopt::Long;
use Pod::Usage qw(pod2usage);
use Geo::BUFR;
 
# Will be used if neither --tablepath nor $ENV{BUFR_TABLES} is set
use constant DEFAULT_TABLE_PATH => '/usr/local/lib/bufrtables';
 
our @pressure_level_desc = (
    '004086',  # Long time period or displacement [s]
    '008042',  # Extended vertical sounding significance [flag table]
    '007004',  # Pressure [PA]
    '010009',  # Geopotential height [GPM]
    '005015',  # Latitude displacement (high accuracy) [degree]
    '006015',  # Longitude displacement (high accuracy) [degree]
    '012101',  # Temperature/dry-bulb temperature [K]
    '012103',  # Dew-point temperature [K]
    '011001',  # Wind direction [degree true]
    '011002',  # Wind speed [m/s]
    );
 
# Parse command line options
our %option = ();
GetOptions(
           \%option,
           'help',
           'outfile=s',
           'tablepath=s',
           'verbose=i',
       ) or pod2usage(-verbose => 0);
 
# User asked for help
pod2usage(-verbose => 1) if $option{help};
 
# Make sure there is 1 input file
pod2usage(-verbose => 0) unless @ARGV == 1;
 
our $verbose = $option{verbose} ? $option{verbose} : 0;
 
# Set BUFR table path
if ($option{tablepath}) {
    # Command line option --tablepath overrides all
    Geo::BUFR->set_tablepath($option{tablepath});
} elsif ($ENV{BUFR_TABLES}) {
    # If no --tablepath option, use the BUFR_TABLES environment variable
    Geo::BUFR->set_tablepath($ENV{BUFR_TABLES});
} else {
    # If all else fails, use the libbufr bufrtables
    Geo::BUFR->set_tablepath(DEFAULT_TABLE_PATH);
}
 
my $infile = $ARGV[0];
open(my $IN, '<',$infile)
    or die "Cannot open $infile: $!";
 
# Where to print the altered BUFR message(s)
my $OUT;
if ($option{outfile}) {
    open($OUT, '>', $option{outfile})
        or die "Cannot open $option{outfile} for writing: $!";
} else {
    $OUT = *STDOUT;
}
 
my $bufr = Geo::BUFR->new();
$bufr->fopen($infile);
my @subset_data; # Will contain data values for subset 1,2...
my @subset_desc; # Will contain the set of descriptors for subset 1,2...
my $new_bufr;
 
READLOOP:
while (not $bufr->eof()) {
    # Read (and decode) next observation, skipping messages that
    # cannot be decoded
    my ($data, $desc);
        eval {
            ($data, $desc) = $bufr->next_observation();
        };
        if ($@) {
            warn $@;
            # Try to extract message number and ahl of the bulletin
            # where the error occurred
            my $current_message_number = $bufr->get_current_message_number();
            if (defined $current_message_number) {
                my $error_msg = "In message $current_message_number";
                my $current_ahl = $bufr->get_current_ahl();
                $error_msg .= " contained in bulletin with ahl $current_ahl\n"
                    if $current_ahl;
                warn $error_msg if $error_msg;
            }
            next READLOOP;
        }
    next READLOOP if !$data;
 
    my $isub = $bufr->get_current_subset_number();
    my $nsubsets = $bufr->get_number_of_subsets();
    my $imsg = $bufr->get_current_message_number();
    my $ahl = $bufr->get_current_ahl();
    if ($isub == 1) {
        $new_bufr = Geo::BUFR->new();
        $new_bufr->copy_from($bufr,'metadata');
        @subset_data = ();
        @subset_desc = ();
    }
 
    my $descriptors_unexpanded = $bufr->get_descriptors_unexpanded();
    if ($descriptors_unexpanded !~ /^30905(2|3)/) {
	if ($verbose > 0 && $isub == 1) {
	    print "Removing message $imsg";
	    print " contained in bulletin with ahl $ahl" if $ahl;
	    print " since unexpanded descriptors did not start with 309052 or 309053\n";
	}
	next;
    } else {
	if ($verbose > 3) {
	    print "Processing subset $isub in message $imsg";
	    print " contained in bulletin with ahl $ahl" if $ahl;
	    print "\n";
	}
        my @plevels; # array of references to the 10 data values of
                     # one pressure level
        my $i_eddrf = ($descriptors_unexpanded =~ /^309052/) ? 29 : 19;
	my $i1 = $i_eddrf;
        while ($desc->[$i1] ne '031001') {
            my @record = @$data[$i1 .. $i1+9];
            if (defined $record[2]) {
                push @plevels, \@record;
            } elsif ($verbose > 2) {
                print "Missing pressure in level ", ($i1 - $i_eddrf)/10 + 1
                    . " in subset $isub in Message number $imsg";
                print " in $ahl" if $ahl;
                print "\n";
            }
            $i1 += 10;
        }
        if (!@plevels || already_sorted(\@plevels)) {
            # Reencode the BUFR message with no changes
            $subset_data[$isub] = $data;
            $subset_desc[$isub] = $desc;
        } else {
            if ($verbose > 0) {
                print "Need to sort (or merge) subset $isub"
                    . " in Message number $imsg";
                print " in $ahl" if $ahl;
                print "\n";
            }
            my ($new_data, $new_desc)
                = get_sorted_data($data,$desc,\@plevels,$i1,$i_eddrf);
            $subset_data[$isub] = $new_data;
            $subset_desc[$isub] = $new_desc;
        }
    }
 
    if ($isub == $nsubsets) {
        print $OUT $new_bufr->encode_message(\@subset_data, \@subset_desc);
    }
}
$bufr->fclose();
 
## Will return in ($new_data, $new_desc) the same data as in
## ($data,$desc) except that pressure levels are sorted on decreasing
## pressure values, also merging levels with same pressure (if time
## and or position coordinates are equal in the 2 pressure levels or
## missing)
sub get_sorted_data {
    my ($data,$desc,$plevels,$i1,$i_eddrf) = @_;
    my ($new_data,$new_desc);
 
    # Section 4 up to where pressure level data starts is unaltered
    for my $ii (0 .. $i_eddrf-2) {
        $new_desc->[$ii] = $desc->[$ii];
        $new_data->[$ii] = $data->[$ii];
    }
 
    # Then handle pressure levels (and eddrf)
    $new_desc->[$i_eddrf-1] = '031002';
    $new_data->[$i_eddrf-1] = undef; # Will be filled with $eddrf later
    my $eddrf = 0; # extended delayed descriptor replication factor
 
    # Use a Schwartzian transform to sort on descending pressure levels
    my @sorted_plevels = map { $_->[0] }
                         sort { $b->[1] <=> $a->[1] }
                         map { [$_, $_->[2]] } @{$plevels};
 
    # Levels with same pressure and same time and position should be
    # coalesced into one
    my @prev_plevel = @{$sorted_plevels[0]};
    my $prev_PP = $prev_plevel[2];
    for my $ii (1 .. (@{$plevels}-1))  {
        my $PP = $sorted_plevels[$ii]->[2];
        if ($PP == $prev_PP
            && same_pos_time(\@prev_plevel,$sorted_plevels[$ii])) {
            # Combine the two levels into 1
            if ($verbose > 2) {
                print "Merging pressure level for pressure = $PP\n";
            }
            my @new_plevel;
            $new_plevel[0] = defined($prev_plevel[0])
                ? $prev_plevel[0] : $sorted_plevels[$ii]->[0];
            my $evss = combine_evss
                ($sorted_plevels[$ii-1]->[1], $sorted_plevels[$ii]->[1]);
            $new_plevel[1] = $evss;
            $new_plevel[2] = $PP;
            for (3 .. 9) {
                $new_plevel[$_] = defined($prev_plevel[$_])
                    ? $prev_plevel[$_] : $sorted_plevels[$ii]->[$_];
            }
            @prev_plevel = @new_plevel;
        } else {
            push @$new_data, @prev_plevel;
            push @$new_desc, @pressure_level_desc;
            @prev_plevel = @{$sorted_plevels[$ii]};
            $eddrf++;
        }
        $prev_PP = $PP;
    }
    # Add last level
    push @$new_data, @prev_plevel;
    push @$new_desc, @pressure_level_desc;
    $eddrf++;
 
    $new_data->[$i_eddrf-1] = $eddrf;
 
    # Section 4 after pressure level data is unaltered
    # (assuming windshear levels are sorted already)
    push @$new_data, @$data[$i1 .. @$data-1];
    push @$new_desc, @$desc[$i1 .. @$data-1];
 
    return ($new_data, $new_desc);
}
 
## Combine two extended vertical sounding significance (evss) into one
sub combine_evss {
    my ($evss1,$evss2) = @_;
    if (defined($evss1) && defined($evss2)) {
        my $binary1 = pack "N", $evss1; # Packed as 32 bits in big-endian order
        my $binary2 = pack "N", $evss2;
        my $binary = $binary1 | $binary2; # bitwise or
        return unpack "N", $binary;
    } elsif (defined($evss1)) {
        return $evss1;
    } else {
        return $evss2;
    }
}
 
## Return true (1) if pressure levels are sorted and unique, else
## return false (0)
sub already_sorted {
    my $plevels = shift;
 
    return 1 if !@{$plevels};
    my @PP = map { $_->[2] } @{$plevels};
    my $prev_PP = 1000000000; # Surely bigger than any real PP
    my $ii = 0;
    foreach my $PP (@PP) {
	if ($PP > $prev_PP) {
	    if ($verbose > 1) {
		print "Not sorted after pressure level $prev_PP";
		print " (next level is $PP)\n";
	    }
	    return 0;
	}
        if ($PP == $prev_PP) {
            if (same_pos_time($plevels->[$ii],$plevels->[$ii-1])) {
		if ($verbose > 1) {
		    print "Duplicate levels at pressure $PP\n";
		}
		return 0;
	    }
	}
        $prev_PP = $PP;
        $ii++;
    }
    return 1;
}
 
## Return false (0) if the two (references) to pressure levels have
## position or time defined with differing values, else returns true
## (1)
sub same_pos_time {
    my ($p1,$p2) = @_;
 
    if (defined($p1->[0]) && defined($p2->[0]) && $p1->[0] != $p2->[0]) {
        return 0; # 004086 'Long time period or displacement' differ
    } elsif (defined($p1->[4]) && defined($p2->[4]) && $p1->[4] != $p2->[4]) {
        return 0; # 005015 'Latitude displacement' differ
    } elsif (defined($p1->[5]) && defined($p2->[5]) && $p1->[5] != $p2->[5]) {
        return 0; # 005015 'Latitude displacement' differ
    }
    return 1;
}
 
=pod
 
=head1 SYNOPSIS
 
  sortbufrtemp.pl <bufr file>
      [--outfile <sorted bufr file>]
      [--tablepath <path to BUFR tables>]
      [--verbose n]
      [--help]
 
=head1 DESCRIPTION
 
Will extract from <bufr file> all BUFR TEMPs (recognized by unexpanded
descriptors in section 3 starting with 309052 or 309053), sorting the
pressure levels and merging duplicate pressure levels (having same
pressure, time and location coordinates) into one single level,
printing to STDOUT unless C<--outfile> is set. Pressure levels with
missing pressure will be removed.
 
Execute without arguments for Usage, with option C<--help> for some
additional info.
 
=head1 OPTIONS
 
 
   --outfile <filename>
                    Will print to <filename> instead of STDOUT
   --tablepath <path to BUFR tables>
                    Set path to BUFR tables (overrides $ENV{BUFR_TABLES})
   --verbose n      Set verbose level to n, 0<=n<=3 (default 0).
                    n>=1: prints which BUFR messages were actually altered,
                          and which messages were removed because unexpanded
                          descriptors did not start with 309052 or 309053
                    n>=2: prints at which pressure level sorting
                          breaks, or first duplicate level encountered
                          (whichever of these conditions happened
                          first)
                    n>=3: prints which pressure levels were merged,
                          and which levels were removed due to missing
                          pressure
                    n=4: prints which subset, message number and ahl
                         (if exists) is currently processed
                    Verbose output is sent to STDOUT, so ought to be
                    combined with option --outfile
   --help           Display Usage and explain the options used. Almost
                    the same as consulting perldoc sortbufrtemp.pl
 
Options may be abbreviated, e.g. C<--h> or C<-h> for C<--help>.
 
To avoid having to use the C<--tablepath> option, you are adviced to
set the environment variable BUFR_TABLES to the directory where your
BUFR tables are located (unless the default path provided by
sortbufrtemp.pl works for you).
 
=head1 AUTHOR
 
Pål Sannes E<lt>pal.sannes@met.noE<gt>
 
=head1 COPYRIGHT
 
Copyright (C) 2015 met.no
 
=cut