Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision | |||
|
bufr.pm:bufrextract.pl_source [2025-11-05 09:28:24] pals |
bufr.pm:bufrextract.pl_source [2026-03-26 17:56:54] (current) pals |
||
|---|---|---|---|
| Line 2: | Line 2: | ||
| # | # | ||
| - | # (C) Copyright | + | # Copyright |
| # | # | ||
| # This program is free software; you can redistribute it and/or modify | # This program is free software; you can redistribute it and/or modify | ||
| Line 30: | Line 30: | ||
| my %option = (); | my %option = (); | ||
| GetOptions( | GetOptions( | ||
| - | \%option, | + | |
| - | | + | ' |
| - | | + | ' |
| - | | + | ' |
| - | | + | |
| - | | + | ' |
| - | | + | ' |
| - | | + | ' |
| - | | + | ' |
| + | ) or pod2usage(-verbose => 0); | ||
| # User asked for help | # User asked for help | ||
| Line 57: | Line 58: | ||
| Geo:: | Geo:: | ||
| + | # For filtering on ahl | ||
| my $ahl_regexp; | my $ahl_regexp; | ||
| if ($option{ahl}) { | if ($option{ahl}) { | ||
| Line 62: | Line 64: | ||
| die " | die " | ||
| } | } | ||
| + | |||
| + | # For filtering on metadata in section 0/1 | ||
| + | my $filter = $option{filter} ? $option{filter} : ''; | ||
| + | my $or_criteria_ref = get_filter_criteria($filter); | ||
| # Where to direct output (including verbose output, but not output to STDERR) | # Where to direct output (including verbose output, but not output to STDERR) | ||
| Line 79: | Line 85: | ||
| foreach my $inputfname ( @ARGV ) { | foreach my $inputfname ( @ARGV ) { | ||
| my $bufr = Geo:: | my $bufr = Geo:: | ||
| - | | + | |
| + | # Could alternatively have merged filtering on ahl and metadata into | ||
| + | # one single callback function, but that would be a rather complex | ||
| + | # one, so we prefer to do the filtering on metadata later | ||
| + | | ||
| # Open BUFR file | # Open BUFR file | ||
| Line 123: | Line 133: | ||
| } | } | ||
| - | | + | # Filtering on ahl |
| + | | ||
| + | |||
| + | # Filtering on metadata | ||
| + | next READLOOP if $or_criteria_ref && not or_filter($bufr, | ||
| # Skip messages where stated length of BUFR message is sure to | # Skip messages where stated length of BUFR message is sure to | ||
| # be erroneous, unless we want ahls only (or should we skip | # be erroneous, unless we want ahls only (or should we skip | ||
| # message in this case also? Hard choice...) | # message in this case also? Hard choice...) | ||
| - | next if !$option{only_ahl} && $bufr-> | + | next READLOOP |
| my $current_subset_number = $bufr-> | my $current_subset_number = $bufr-> | ||
| Line 156: | Line 171: | ||
| my $msg = $bufr-> | my $msg = $bufr-> | ||
| print $OUT $msg, $gts_eom; | print $OUT $msg, $gts_eom; | ||
| - | | + | |
| } | } | ||
| Line 167: | Line 182: | ||
| return $ahl =~ $ahl_regexp ? 0 : 1; | return $ahl =~ $ahl_regexp ? 0 : 1; | ||
| } | } | ||
| + | |||
| + | # Get the list of alternative metadata criteria (these are separated | ||
| + | # by ' | ||
| + | sub get_filter_criteria { | ||
| + | my $filter = shift; | ||
| + | return ('' | ||
| + | |||
| + | my @or_criteria; | ||
| + | my @criteria = split /[|]/, $filter; | ||
| + | foreach my $cr (@criteria) { | ||
| + | $cr =~ s/^\s+//; | ||
| + | $cr =~ s/\s+$//; | ||
| + | if ($cr ne '' | ||
| + | push @or_criteria, | ||
| + | } | ||
| + | } | ||
| + | return \@or_criteria; | ||
| + | } | ||
| + | |||
| + | # Return true (1) if the BUFR message is matching all @and_criteria | ||
| + | # (to be extracted) for at least one of the @or_criteria | ||
| + | sub or_filter { | ||
| + | my ($bufr, $or_criteria_ref) = @_; | ||
| + | |||
| + | my $be = $bufr-> | ||
| + | my $dc = $bufr-> | ||
| + | # Choose to equate data_subcategory with int_data_subcategory, | ||
| + | # not quite sure about this | ||
| + | my $ic = ($be == 4) ? $bufr-> | ||
| + | : $bufr-> | ||
| + | my $lc = $bufr-> | ||
| + | my $oc = $bufr-> | ||
| + | my $os = $bufr-> | ||
| + | my $mt = $bufr-> | ||
| + | my $lt = $bufr-> | ||
| + | # This will not work for edition 3 when year is before 2000, | ||
| + | # but hard to find a better way... | ||
| + | my $ye = ($be == 4) ? $bufr-> | ||
| + | : $bufr-> | ||
| + | my $mo = $bufr-> | ||
| + | my $da = $bufr-> | ||
| + | my $ho = $bufr-> | ||
| + | my $mi = $bufr-> | ||
| + | my $se = ($be == 4) ? $bufr-> | ||
| + | |||
| + | my $include = 0; | ||
| + | OR: | ||
| + | foreach my $or_criterium (@$or_criteria_ref) { | ||
| + | my $all_ok = 1; | ||
| + | my @and_criteria = split /\s+/, $or_criterium; | ||
| + | AND: | ||
| + | foreach my $and_criterium (@and_criteria) { | ||
| + | my ($c, $list) = split /=/, $and_criterium; | ||
| + | my @list = split /,/, $list; | ||
| + | if ($c eq ' | ||
| + | if (not grep { $_ eq $be } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $dc } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $ic } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | # Not in BUFR edition 3 | ||
| + | if (!(defined $lc) || not grep { $_ eq $lc } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $oc } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $os } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $mt } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $lt } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $ye } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $mo } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $da } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $ho } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $mi } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } elsif ($c eq ' | ||
| + | if (not grep { $_ eq $se } @list) { | ||
| + | $all_ok = 0; | ||
| + | last AND; | ||
| + | } | ||
| + | } else { | ||
| + | die " | ||
| + | . " for the full list of 2-letter abbreviations accepted!"; | ||
| + | } | ||
| + | } # end AND | ||
| + | if ($all_ok == 1) { | ||
| + | # BUFR message has met all conditions in this | ||
| + | # or-criterium, | ||
| + | $include = 1; | ||
| + | last OR; | ||
| + | } | ||
| + | |||
| + | } # end OR | ||
| + | |||
| + | return $include; | ||
| + | } | ||
| + | |||
| Line 177: | Line 334: | ||
| bufrextract.pl <bufr file(s)> | bufrextract.pl <bufr file(s)> | ||
| [--ahl < | [--ahl < | ||
| - | [--only_ahl] | [--without_ahl] | [--gts] | + | [--only_ahl | --without_ahl | --gts] |
| + | [--filter < | ||
| [--outfile < | [--outfile < | ||
| [--help] | [--help] | ||
| Line 185: | Line 343: | ||
| Extract all BUFR messages and/or corresponding AHLs from BUFR file(s), | Extract all BUFR messages and/or corresponding AHLs from BUFR file(s), | ||
| - | possibly filtering on AHL. | + | possibly filtering on AHL and/or metadata in section 1. |
| The AHL (Abbreviated Header Line) is recognized as the TTAAii CCCC | The AHL (Abbreviated Header Line) is recognized as the TTAAii CCCC | ||
| Line 202: | Line 360: | ||
| | | ||
| | | ||
| + | | ||
| + | Extract BUFR messages matching the < | ||
| | | ||
| Will print to < | Will print to < | ||
| Line 211: | Line 371: | ||
| For option C< | For option C< | ||
| - | expression. E.g. C<--ahl ' | + | expression. E.g. C<--ahl ' |
| (ISS) from CCCC=ENMI. | (ISS) from CCCC=ENMI. | ||
| Line 219: | Line 379: | ||
| the GTS: Attachment II-4. Format of Meteorological Messages. | the GTS: Attachment II-4. Format of Meteorological Messages. | ||
| - | No bufrtables are needed for running bufrextract.pl, | + | Using C< |
| - | in BUFR message will not be decoded (which also speeds up execution | + | the metadata present in section 1 (and 0) of the BUFR messages. Some few |
| - | quite a bit). | + | examples which hopefully are enough to illustrate how to write the |
| + | < | ||
| + | WMO-no. 306, "dc=0 ic=0, | ||
| + | one-hour observations from fixed-land stations, while "dc=1 ic=0, | ||
| + | should do the same for marine stations. If you want to extract both, | ||
| + | use for < | ||
| + | |||
| + | Here is the full list of metadata available for filtering (the first | ||
| + | 2-letter abbreviation is what should be used in the < | ||
| + | |||
| + | be = BUFR edition | ||
| + | oc = Originating centre | ||
| + | os = Originating subcentre | ||
| + | dc = Data category (table A) | ||
| + | ic = International data subcategory | ||
| + | lc = Local data subcategory | ||
| + | mt = Master table version number | ||
| + | lt = Local table version number | ||
| + | ye = Year | ||
| + | mo = Month | ||
| + | da = Day | ||
| + | ho = Hour | ||
| + | mi = Minute | ||
| + | se = Second | ||
| + | |||
| + | Note that no bufrtables are needed for running bufrextract.pl, | ||
| + | section 4 in BUFR message will not be decoded (which also speeds up | ||
| + | execution | ||
| =head1 HINTS | =head1 HINTS | ||
| Line 229: | Line 416: | ||
| section 0-3, by making your own copy of bufrextract.pl and then | section 0-3, by making your own copy of bufrextract.pl and then | ||
| employing one of the many C< | employing one of the many C< | ||
| - | to extract only BUFR messages with data category 1, add the following | + | to extract only BUFR messages with TM315009, add the following |
| line just before calling C< | line just before calling C< | ||
| - | next if $bufr-> | + | next if $bufr-> |
| - | + | ||
| - | Or to extract BUFR messages with TM315009 only: | + | |
| - | + | ||
| - | next if bufr-> | + | |
| =head1 CAVEAT | =head1 CAVEAT | ||
| Line 251: | Line 434: | ||
| =head1 COPYRIGHT | =head1 COPYRIGHT | ||
| - | Copyright (C) 2010-2025 MET Norway | + | Copyright (C) 2010-2026 MET Norway |
| =cut | =cut | ||
| </ | </ | ||