Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
|
bufr.pm:bufrextract.pl_source [2022-05-31 09:29:31] external edit |
bufr.pm:bufrextract.pl_source [2026-03-26 17:56:54] (current) pals |
||
|---|---|---|---|
| Line 2: | Line 2: | ||
| # | # | ||
| - | # (C) Copyright | + | # Copyright |
| # | # | ||
| # This program is free software; you can redistribute it and/or modify | # This program is free software; you can redistribute it and/or modify | ||
| Line 30: | Line 30: | ||
| my %option = (); | my %option = (); | ||
| GetOptions( | GetOptions( | ||
| - | \%option, | + | |
| - | | + | ' |
| - | | + | ' |
| - | | + | ' |
| - | | + | |
| - | ' | + | ' |
| - | ' | + | ' |
| - | | + | ' |
| - | | + | ' |
| + | ) or pod2usage(-verbose => 0); | ||
| # User asked for help | # User asked for help | ||
| Line 44: | Line 45: | ||
| # only_ahl and without_ahl are mutually exclusive | # only_ahl and without_ahl are mutually exclusive | ||
| - | pod2usage( -message => " | + | pod2usage( -message => " |
| | | ||
| | | ||
| - | if $option{only_ahl} && $option{without_ahl}; | + | if ( ($option{only_ahl} && |
| + | || ($option{without_ahl} && ($option{only_ahl} || $option{gts})) | ||
| + | || ($option{gts} && ($option{only_ahl} || $option{without_ahl})) ); | ||
| # Make sure there is at least one input file | # Make sure there is at least one input file | ||
| Line 55: | Line 58: | ||
| Geo:: | Geo:: | ||
| - | # Set whether last ahl should be reused if current BUFR message has no AHL | + | # For filtering on ahl |
| - | Geo:: | + | |
| my $ahl_regexp; | my $ahl_regexp; | ||
| if ($option{ahl}) { | if ($option{ahl}) { | ||
| Line 63: | Line 64: | ||
| die " | die " | ||
| } | } | ||
| + | |||
| + | # For filtering on metadata in section 0/1 | ||
| + | my $filter = $option{filter} ? $option{filter} : ''; | ||
| + | my $or_criteria_ref = get_filter_criteria($filter); | ||
| # Where to direct output (including verbose output, but not output to STDERR) | # Where to direct output (including verbose output, but not output to STDERR) | ||
| Line 80: | Line 85: | ||
| foreach my $inputfname ( @ARGV ) { | foreach my $inputfname ( @ARGV ) { | ||
| my $bufr = Geo:: | my $bufr = Geo:: | ||
| - | | + | |
| + | # Could alternatively have merged filtering on ahl and metadata into | ||
| + | # one single callback function, but that would be a rather complex | ||
| + | # one, so we prefer to do the filtering on metadata later | ||
| + | | ||
| # Open BUFR file | # Open BUFR file | ||
| Line 124: | Line 133: | ||
| } | } | ||
| - | | + | # Filtering on ahl |
| + | | ||
| + | |||
| + | # Filtering on metadata | ||
| + | next READLOOP if $or_criteria_ref && not or_filter($bufr, | ||
| # Skip messages where stated length of BUFR message is sure to | # Skip messages where stated length of BUFR message is sure to | ||
| # be erroneous, unless we want ahls only (or should we skip | # be erroneous, unless we want ahls only (or should we skip | ||
| # message in this case also? Hard choice...) | # message in this case also? Hard choice...) | ||
| - | next if !$option{only_ahl} && $bufr-> | + | next READLOOP |
| my $current_subset_number = $bufr-> | my $current_subset_number = $bufr-> | ||
| Line 137: | Line 151: | ||
| $current_message_number = $bufr-> | $current_message_number = $bufr-> | ||
| $current_ahl = $bufr-> | $current_ahl = $bufr-> | ||
| + | my $gts_eom = ''; | ||
| - | if ($current_ahl | + | if ($current_ahl) { |
| if ($option{only_ahl}) { | if ($option{only_ahl}) { | ||
| print $OUT $current_ahl, | print $OUT $current_ahl, | ||
| } elsif (!$option{without_ahl}) { | } elsif (!$option{without_ahl}) { | ||
| + | if ($option{gts}) { | ||
| + | my $current_gts_starting_line = $bufr-> | ||
| + | print $OUT $current_gts_starting_line; | ||
| + | $gts_eom = $bufr-> | ||
| + | } | ||
| # Use \r\r\n after AHL, since this is the standard | # Use \r\r\n after AHL, since this is the standard | ||
| # sequence used in GTS bulletins | # sequence used in GTS bulletins | ||
| Line 150: | Line 170: | ||
| my $msg = $bufr-> | my $msg = $bufr-> | ||
| - | print $OUT $msg; | + | print $OUT $msg, $gts_eom; |
| - | } | + | } |
| } | } | ||
| Line 162: | Line 182: | ||
| return $ahl =~ $ahl_regexp ? 0 : 1; | return $ahl =~ $ahl_regexp ? 0 : 1; | ||
| } | } | ||
| + | |||
| + | # Get the list of alternative metadata criteria (these are separated | ||
| + | # by ' | ||
| + | sub get_filter_criteria { | ||
| + | my $filter = shift; | ||
| + | return ('' | ||
| + | |||
| + | my @or_criteria; | ||
| + | my @criteria = split /[|]/, $filter; | ||
| + | foreach my $cr (@criteria) { | ||
| + | $cr =~ s/^\s+//; | ||
| + | $cr =~ s/\s+$//; | ||
| + | if ($cr ne '' | ||
| + | push @or_criteria, | ||
| + | } | ||
| + | } | ||
| + | return \@or_criteria; | ||
| + | } | ||
| + | |||
| + | # Return true (1) if the BUFR message is matching all @and_criteria | ||
| + | # (to be extracted) for at least one of the @or_criteria | ||
| + | sub or_filter { | ||
| + | my ($bufr, $or_criteria_ref) = @_; | ||
| + | |||
| + | my $be = $bufr-> | ||
| + | my $dc = $bufr-> | ||
| + | # Choose to equate data_subcategory with int_data_subcategory, | ||
| + | # not quite sure about this | ||
| + | my $ic = ($be == 4) ? $bufr-> | ||
| + | : $bufr-> | ||
| + | my $lc = $bufr-> | ||
| + | my $oc = $bufr-> | ||
| + | my $os = $bufr-> | ||
| + | my $mt = $bufr-> | ||
| + | my $lt = $bufr-> | ||
| + | # This will not work for edition 3 when year is before 2000, | ||
| + | # but hard to find a better way... | ||
| + | my $ye = ($be == 4) ? $bufr-> | ||
| + | : $bufr-> | ||
| + | my $mo = $bufr-> | ||
| + | my $da = $bufr-> | ||
| + | my $ho = $bufr-> | ||
| + | my $mi = $bufr-> | ||
| + | my $se = ($be == 4) ? $bufr-> | ||
| + | |||
| + | my $include = 0; | ||
| + | OR: | ||
| + | foreach my $or_criterium (@$or_criteria_ref) { | ||
| + | my $all_ok = 1; | ||
| + | my @and_criteria = split /\s+/, $or_criterium; | ||
| + | AND: | ||
| + | foreach my $and_criterium (@and_criteria) { | ||
| + | my ($c, $list) = split /=/, $and_criterium; | ||
| + | my @list = split /,/, $list; | ||
| + | if ($c eq ' | ||
| + | if (not grep { $_ eq $be } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $dc } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $ic } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | # Not in BUFR edition 3 | ||
| + | if (!(defined $lc) || not grep { $_ eq $lc } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $oc } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $os } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $mt } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $lt } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $ye } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $mo } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $da } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $ho } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $mi } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $se } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } else { | ||
| + | die " | ||
| + | . " for the full list of 2-letter abbreviations accepted!"; | ||
| + | } | ||
| + | } # end AND | ||
| + | if ($all_ok == 1) { | ||
| + | # BUFR message has met all conditions in this | ||
| + | # or-criterium, | ||
| + | $include = 1; | ||
| + | last OR; | ||
| + | } | ||
| + | |||
| + | } # end OR | ||
| + | |||
| + | return $include; | ||
| + | } | ||
| + | |||
| Line 172: | Line 334: | ||
| bufrextract.pl <bufr file(s)> | bufrextract.pl <bufr file(s)> | ||
| [--ahl < | [--ahl < | ||
| - | [--only_ahl] | [--without_ahl] | + | [--only_ahl | --without_ahl |
| + | [--filter < | ||
| [--outfile < | [--outfile < | ||
| - | [--reuse_ahl n] | ||
| [--help] | [--help] | ||
| [--verbose n] | [--verbose n] | ||
| Line 181: | Line 343: | ||
| Extract all BUFR messages and/or corresponding AHLs from BUFR file(s), | Extract all BUFR messages and/or corresponding AHLs from BUFR file(s), | ||
| - | possibly filtering on AHL. | + | possibly filtering on AHL and/or metadata in section 1. |
| - | The AHL (Abbreviated Header Line) is recognized as the TTAAii CCCC DTG | + | The AHL (Abbreviated Header Line) is recognized as the TTAAii CCCC |
| - | [BBB] immediately preceding the BUFR message. | + | YYGGgg |
| Execute without arguments for Usage, with option C< | Execute without arguments for Usage, with option C< | ||
| Line 195: | Line 357: | ||
| --ahl < | --ahl < | ||
| matching < | matching < | ||
| + | | ||
| | | ||
| | | ||
| + | | ||
| + | Extract BUFR messages matching the < | ||
| | | ||
| Will print to < | Will print to < | ||
| - | | ||
| - | only if immediately preceding | ||
| - | n=1 When filtering using --ahl: Reuse last AHL found if current | ||
| - | BUFR message has no immediately preceding AHL | ||
| | | ||
| more info you might prefer to consult perldoc bufrextract.pl | more info you might prefer to consult perldoc bufrextract.pl | ||
| Line 210: | Line 371: | ||
| For option C< | For option C< | ||
| - | expression. E.g. C<--ahl ' | + | expression. E.g. C<--ahl ' |
| (ISS) from CCCC=ENMI. | (ISS) from CCCC=ENMI. | ||
| - | If the BUFR file(s) are known to consist solely of GTS bulletins, you | + | Use option |
| - | might consider setting | + | present) |
| - | to extract all (and not only the first) BUFR messages | + | envelope |
| - | bulletins. Such bulletins | + | the GTS: Attachment II-4. Format of Meteorological Messages. |
| - | also the L</" | + | |
| - | corresponding AHL is still extracted | + | |
| - | No bufrtables are needed for running bufrextract.pl, | + | Using C< |
| - | in BUFR message will not be decoded (which also speeds up execution | + | the metadata present in section 1 (and 0) of the BUFR messages. Some few |
| - | quite a bit). | + | examples which hopefully are enough to illustrate how to write the |
| + | < | ||
| + | WMO-no. 306, "dc=0 ic=0, | ||
| + | one-hour observations from fixed-land stations, while "dc=1 ic=0, | ||
| + | should do the same for marine stations. If you want to extract both, | ||
| + | use for < | ||
| + | |||
| + | Here is the full list of metadata available for filtering (the first | ||
| + | 2-letter abbreviation is what should be used in the < | ||
| + | |||
| + | be = BUFR edition | ||
| + | oc = Originating centre | ||
| + | os = Originating subcentre | ||
| + | dc = Data category (table A) | ||
| + | ic = International data subcategory | ||
| + | lc = Local data subcategory | ||
| + | mt = Master table version number | ||
| + | lt = Local table version number | ||
| + | ye = Year | ||
| + | mo = Month | ||
| + | da = Day | ||
| + | ho = Hour | ||
| + | mi = Minute | ||
| + | se = Second | ||
| + | |||
| + | Note that no bufrtables are needed for running bufrextract.pl, | ||
| + | section 4 in BUFR message will not be decoded (which also speeds up | ||
| + | execution | ||
| =head1 HINTS | =head1 HINTS | ||
| Line 230: | Line 416: | ||
| section 0-3, by making your own copy of bufrextract.pl and then | section 0-3, by making your own copy of bufrextract.pl and then | ||
| employing one of the many C< | employing one of the many C< | ||
| - | to extract only BUFR messages with data category 1, add the following | + | to extract only BUFR messages with TM315009, add the following |
| line just before calling C< | line just before calling C< | ||
| - | next if $bufr-> | + | next if $bufr-> |
| - | + | ||
| - | Or to extract BUFR messages with TM315009 only: | + | |
| - | + | ||
| - | next if bufr-> | + | |
| =head1 CAVEAT | =head1 CAVEAT | ||
| Line 244: | Line 426: | ||
| between the GTS AHL and the start of BUFR message (besides the | between the GTS AHL and the start of BUFR message (besides the | ||
| standard character sequence CRCRLF), likely leading bufrextract.pl to | standard character sequence CRCRLF), likely leading bufrextract.pl to | ||
| - | miss the AHL. Also, if applying C< | + | miss the AHL. |
| - | a GTS bulletin will then be wrongly associated with the AHL of the | + | |
| - | previous GTS bulletin when filtering on AHL. If bulletins with this | + | |
| - | kind of error is more of a concern than multi message bulletins, you | + | |
| - | should probably refrain from making use of the C< | + | |
| =head1 AUTHOR | =head1 AUTHOR | ||
| Line 256: | Line 434: | ||
| =head1 COPYRIGHT | =head1 COPYRIGHT | ||
| - | Copyright (C) 2010-2019 MET Norway | + | Copyright (C) 2010-2026 MET Norway |
| =cut | =cut | ||
| - | |||
| </ | </ | ||