bufr.pm:bufrread.pl

This is an old revision of the document!


#!/usr/bin/perl -w

# (C) Copyright 2010, met.no
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

# Usage: bufrread <bufr file(s)> [options]

# Extract Bufr messages from bufr file(s) and decode them.
# See subroutine usage_verbose for explanation of the options.

# Author: P.Sannes met.no 2010

use strict;
use Getopt::Long;

# metno module
use BUFR;

my $DEFAULT_TABLE_PATH = '/usr/local/lib/emos/bufrtables';
my $DEFAULT_CTABLE = 'C0000000000088013001';

# Parse command line options
my %option = ();
GetOptions(
           \%option,
           'all_operators',# Show all operator descriptors when printing section 4
           'bitmap',       # Display bit mapped values on same line
           'codetables',   # Use code and flag tables to resolve values
           'data_only',    # Print section 4 (data section) only
           'filter=s',     # Decode observations meeting criteria in <filter file> only
           'help',         # Print help information and exit
           'noqc',         # Do not decode quality control
           'optional_section',  # Display a hex dump of optional section if present
           'outfile=s',    # Print to file instead of STDOUT
           'param=s',      # Decode parameters with descriptors in <descriptor file> only
           'strict_checking=i', # Enable/disable strict checking of BUFR format
           'tablepath=s',  # Set BUFR table path
           'verbose=i',    # Set verbose level to n, 0<=n<=3 (default 0)
           'width=i',      # Set width of values field (default is 15 characters)
       ) or die "Wrong option(s), execute $0 without arguments for Usage\n";

# User asked for help
usage_verbose() if $option{help};

# Make sure there is at least one input file
usage() unless @ARGV;

my $verbose = $option{verbose} ? $option{verbose} : 0;

# Set verbosity level for the BUFR module. Must be set also for each
# BUFR object generated
BUFR->set_verbose($verbose);

# Set whether quality information should be decoded for the BUFR module
BUFR->set_noqc() if ($option{noqc});

BUFR->set_strict_checking($option{strict_checking}) if defined $option{strict_checking};

BUFR->set_show_all_operators($option{all_operators}) if defined $option{all_operators};

# Set BUFR table path
if ($option{tablepath}) {
    # Command line option --tablepath overrides all
    BUFR->set_tablepath($option{tablepath});
} elsif ($ENV{BUFR_TABLES}) {
    # If no --tablepath option, use the BUFR_TABLES environment variable
    BUFR->set_tablepath($ENV{BUFR_TABLES});
} else {
    # If all else fails, use the libemos bufrtables
    BUFR->set_tablepath($DEFAULT_TABLE_PATH);
}

# Where to direct output (including verbose output, but not output to STDERR)
my $OUT;
if ($option{outfile}) {
    open($OUT, '>',$option{outfile})
        or die "Cannot open $option{outfile} for writing: $!";
} else {
    $OUT = *STDOUT;
}

my @requested_desc;
if ($option{param}) {
    @requested_desc = read_descriptor_file($option{param});
}

# Arrays over filter criteria, used if option --filter is set
my @fid;      # Filter descriptors, .e.g. $fid[1] = [ 001001, 001002 ]
my @fiv;      # Filter values, e.g. $fiv[1] = [ [ 3, 895 ], [ 6 252 ] ]
my @num_desc; # Number of filter descriptors for each criterion, e.g. $num_desc[1] = 2
my @num_val;  # Number of filter value lines for each criterion, e.g. $num_val[1] = 2
my @required; # 1 for required criteria (D!: in filter file), 0 for others
my $num_criteria = 0;
my $num_required_criteria = 0;
if ($option{filter}) {
    read_filter_file($option{filter});
}

my $width = $option{width} ? $option{width} : 15;

# Used to display section 2 if --optional_section is set
my $sec2_code_ref = sub {return '    Hex dump:'.' 'x26 . unpack('H*',substr(shift,4))};

# Loop for processing of BUFR input files
foreach my $inputfname ( @ARGV ) {
    my $bufr = BUFR->new();

    # This sets object verbose level equal to class verbose level
    $bufr->set_verbose($verbose);

    if ($option{codetables}) {
        # Load C table. So far, I don't see the need for user to
        # choose another C table than the default table
        $bufr->load_Ctable($DEFAULT_CTABLE)
            or die "Unable to load table $DEFAULT_CTABLE";
    }

    # Open BUFR file
    $bufr->fopen($inputfname);

    # Process input file
    decode($bufr);
    $bufr->fclose();
}


# Extract data from BUFR file.
# Print WMO ahl for first message in each WMO bulletin, print
# message number for each new message, print subset number
# if message contains more than one subset
sub decode {
    my $bufr = shift;		# BUFR object

    my ($message_header, $current_message_number, $current_ahl);
    my $section013_dumped = 0; # Used to keep track of whether sections
                               # 0-3 have been printed when --filter
                               # option has been used
  READLOOP:
    while (not $bufr->feof()) {

	# Read next observation. If an error is encountered during
	# decoding, skip this observation while printing the error
	# message to STDERR, also displaying ahl of bulletin if found.
        my ($data, $descriptors);
	eval {
            ($data, $descriptors) = $bufr->next_observation();
        };
        if ($@) {
            warn $@;
            # Try to extract message number and ahl of the bulletin
            # where the error occurred
            eval {
                $current_message_number = $bufr->get_current_message_number();
                $current_ahl = $bufr->get_current_ahl() || '';
                my $error_msg;
                $error_msg = "In message $current_message_number"
                    if $current_message_number;
                $error_msg .= " contained in bulletin with ahl $current_ahl\n"
                    if $current_ahl;
                warn $error_msg if $error_msg;
            };
            next READLOOP;
        }

        if ($option{param}) {
            # Reduce data and descritors to those requested only
            ($data, $descriptors)
                = param($data, $descriptors, @requested_desc);
        }

        my $current_subset_number = $bufr->get_current_subset_number();
        my $nsubsets = $bufr->get_number_of_subsets();

        if ($current_subset_number == 1) {
            $current_message_number = $bufr->get_current_message_number();
            $current_ahl = $bufr->get_current_ahl() || '';
            $message_header = sprintf "\nMessage %d", $current_message_number;
            $message_header .= (defined $current_ahl)
                ? "  $current_ahl\n" : "\n";

            $section013_dumped = 0;
            next READLOOP if $option{filter}
                and filter_observation($bufr, $data, $descriptors);

            print $OUT $message_header;

            if (not $option{data_only}) {
                print $OUT $bufr->dumpsection0();
                print $OUT $bufr->dumpsection1();
                print $OUT $bufr->dumpsection2($sec2_code_ref)
                    if $option{optional_section};
                print $OUT $bufr->dumpsection3();
                $section013_dumped = 1;
            }
        } else { # subset number > 1
            next READLOOP if $option{filter}
                and filter_observation($bufr, $data, $descriptors);

            # If subset 1 was filtered away, section 0-3 might not
            # have been printed yet
            if ($option{filter} and not $option{data_only}
                 and not $section013_dumped)  {
                print $OUT $bufr->dumpsection0();
                print $OUT $bufr->dumpsection1();
                print $OUT $bufr->dumpsection2($sec2_code_ref)
                    if $option{optional_section};
                print $OUT $bufr->dumpsection3();
                $section013_dumped = 1;
            }
        }

        if ($nsubsets > 1) {
            printf $OUT "\nSubset %d\n", $current_subset_number;
        }
        if ($option{bitmap}) {
            print $OUT $bufr->dumpsection4_with_bitmaps($data, $descriptors, $width);
        } else {
            print $OUT $bufr->dumpsection4($data, $descriptors, $width);
        }
    }
}

sub read_descriptor_file {
    my $descriptor_file = shift;

    open my $fh, '<', $descriptor_file
        or die "Cannot open $descriptor_file: $!";
    my @requested_desc;
    while (<$fh>) {
        next unless /^\s*(\d{6})/;
        push @requested_desc, $1;
    }
    return @requested_desc;
}

# Reduce the data to those corresponding to the requested descriptors
# only
sub param {
    my ($data, $descriptors, @requested_desc) = @_;

    my (@req_data, @req_desc);
    my $i = 0;
    foreach my $id ( @{$descriptors} ) {
        if (grep { $id == $_ } @requested_desc) {
            push @req_data, $data->[$i];
            push @req_desc, $id;
        }
        $i++;
    }
    return (\@req_data, \@req_desc);
}

sub usage {
    print <<"EOF";
Usage: $0 <bufr file(s)>
        [--codetables]
        [--data_only]
        [--param <descriptor file>]
        [--filter <filter file>]
        [--bitmap]
        [--noqc]
        [--outfile <filename>]
        [--optional_section]
        [--width n]
        [--strict_checking n]
        [--all_operators]
        [--tablepath <path to BUFR tables>]
        [--verbose n]
        [--help]
Try 'bufrread.pl --help' for more information.
EOF
    exit 0;
}


sub usage_verbose {
    print <<"EOF";

Usage: $0 <bufr file(s)> [options]

Will print section 0-4 in BUFR messages in <bufr file(s)>.
Options (may be abbreviated, e.g. --h or -h for --help) are:

        --codetables    Use code and flag tables to resolve values where unit
                        is [CODE TABLE] or [FLAG TABLE]
        --data_only     Print section 4 (data section) only
        --param <descriptor file>
                        Decode parameters with descriptors in <descriptor file> only
        --filter <filter file>
                        Decode observations meeting criteria in <filter file> only
        --bitmap        Display bit mapped values on same line
        --noqc          Do not decode quality control
                        (or any descriptors following 222000)
        --outfile <filename>
                        Will print to <filename> instead of STDOUT
        --optional_section
                        Display a hex dump of optional section if present
        --width n       Set width of field used for data values to n characters
                        (default is 15)
        --strict_checking n n=0 (default) Disable strict checking of BUFR format
                            n=1 Issue warning if (recoverable) error in
                                BUFR format
                            n=2 Croak if (recoverable) error in BUFR format.
                                Nothing more in this message will be decoded.
        --all_operators Show all operator descriptors when printing section 4
        --tablepath <path to BUFR tables>
                        Set path to BUFR tables (overrides ENV{BUFR_TABLES})
        --verbose n     Set verbose level to n, 0<=n<=3 (default 0)
        --help          Print this Usage

You should probably set
        export BUFR_TABLES=$DEFAULT_TABLE_PATH
or use the --tablepath option.

Each line in <descriptor file> should start with a BUFR descriptor (6 digits).
Rest of line will be ignored. bufrread will extract values for these descriptors
only. If used together with --filter, <descriptor file> must contain all descriptors
in <filter file>.

Using --filter will decode only those observations that meet one of the criteria in
<filter file> (and all of those criteria marked D!). Comments (starting with #) are
ignored. An example of a filter file is

# All stations in WMO block 01
D: 001001
1
D: 001001 001002
3 895
6 252
D: 001011
LF5U       # Ekofisk
D!: 004004
6
7

which decodes all observations with block number 01, two other specific wmo stations
and one specific ship, all of which having hour (004004) equal to 6 or 7.
If there is no value line after a descriptor line, it is enough that the observation
contains the descriptor(s), whatever the values are. So to extract all ship messages
from a BUFR synop file, the filter file should contain this single line only:

D: 001011

If an error occurs during decoding (typically because the required BUFR table is
missing or message is corrupt) the message is skipped, and the number of errors
is reported at end of output.

EOF
    exit 0;
}

###################################################################################

# Filter routines

# Read in content of $filter_file into variables @fid, @fiv,
# @num_desc, @num_val and $num_criteria, which are defined above.
sub read_filter_file {
    my $filter_file = shift;

    open my $fh, '<', $filter_file
        or die "Cannot open $filter_file: $!";
    while (<$fh>) {
        # Remove comments
        s/#.*//;
        next if /^$/;

        if (s/^\s*D(!)?://) {
            my @desc = split;
            $num_desc[++$num_criteria] = @desc;
            $num_val[$num_criteria] = 0;
            $fid[$num_criteria] = \@desc;
            $required[$num_criteria] = $1 ? 1 : 0;
            $num_required_criteria++ if $1;
        } else {
            my @values = split;
            $fiv[$num_criteria]->[++$num_val[$num_criteria]] = \@values;
        }
    }
    return;
}

# Return true (observations should be filtered) if the observation
# does not meet all of the D! criteria (if exists) and does not meet
# any one of the other criteria (if exists) in filter file
sub filter_observation {
    my $bufr = shift;
    die "Error in filter_observation: argument not a BUFR object"
        unless ref($bufr) eq 'BUFR';
    my ($data, $descriptors) = @_;

    my $num_ordinary_criteria = @fid - $num_required_criteria;
    my $num_success_req_criteria = 0; # Number of required criteria successfully fulfilled
    my $num_success_ord_criteria = 0; # Number of ordinary criteria successfully fulfilled

    # loop through all different criteria:
  CRITERIA: foreach my $filter_criterion (1 .. $num_criteria) {
        if ($num_val[$filter_criterion] == 0) {
            # Enough to check that the descriptor(s) are present in observation
            my $nmatch = 0;
            # loop through all descriptors in criterion:
            foreach my $idesc (0 .. $num_desc[$filter_criterion] - 1) {
                my $filter_desc = $fid[$filter_criterion]->[$idesc];
                for (my $j = 0; $j < @{$descriptors}; $j++) {
                    if ($descriptors->[$j] == $filter_desc) {
                        $nmatch++; # Matched!
                        if ($nmatch == $num_desc[$filter_criterion]) {
                            # All descriptors for this line in this criterion matched.
                            # Do we need to check more criteria?
                            if ($required[$filter_criterion]) {
                                $num_success_req_criteria++;
                                if ($num_success_req_criteria == $num_required_criteria
                                    and ($num_ordinary_criteria == 0
                                         or $num_success_ord_criteria > 0)) {
                                    return 0; # Don't filter this observation
                                }
                            } else {
                                $num_success_ord_criteria++;
                                if ($num_success_req_criteria == $num_required_criteria) {
                                    return 0; # Don't filter this observation
                                }
                            }
                        }
                    }
                }
            }
        } else {
            # loop through all filter values lines (for given) criterion:
          LINE: foreach my $line (1 .. $num_val[$filter_criterion]) {
                my $nmatch = 0;
                # loop through all descriptors in criterion:
              DESC: foreach my $idesc (0 .. $num_desc[$filter_criterion] - 1) {
                    my $filter_desc = $fid[$filter_criterion]->[$idesc];
                    # loop through all data in subset:
                    for (my $j = 0; $j < @{$descriptors}; $j++) {
                        if ($descriptors->[$j] == $filter_desc) {
                            (my $val = $data->[$j]) =~ s/^\s*(.*?)\s*$/$1/;
                            if ($val eq $fiv[$filter_criterion]->[$line]->[$idesc]) {
                                $nmatch++; # Matched!
                                if ($nmatch == $num_desc[$filter_criterion]) {
                                    # All descriptors for this line in this criterion matched.
                                    # Do we need to check more criteria?
                                    if ($required[$filter_criterion]) {
                                        $num_success_req_criteria++;
                                        if ($num_success_req_criteria == $num_required_criteria
                                            and ($num_ordinary_criteria == 0
                                                 or $num_success_ord_criteria > 0)) {
                                            return 0; # Don't filter this observation
                                        }
                                    } else {
                                        $num_success_ord_criteria++;
                                        if ($num_success_req_criteria == $num_required_criteria) {
                                            return 0; # Don't filter this observation
                                        }
                                    }
                                } else {
                                    next DESC;
                                }
                            } else {
                                # Found the descriptor, but wrong value
                                next LINE;
                            }
                        }
                    }
                } # End of filter descriptor loop
            } # End of value line loop
        }
    } # End of criteria loop

    # One required criterion not fulfilled, or if there are no
    # required criteria: none of the non-required criteria fulfilled
    # (so the observation should be filtered away)
    return 1;
}
This website uses cookies. By using the website, you agree with storing cookies on your computer. Also you acknowledge that you have read and understand our Privacy Policy. If you do not agree leave the website.More information about cookies
  • bufr.pm/bufrread.pl.1267015638.txt.gz
  • Last modified: 2022-05-31 09:23:11
  • (external edit)