#!/usr/bin/perl -w use strict; # This script reads the MEDS profile data # # It outputs text files with a header, and then a columns of # depth variable1 variable2 ... variableN # the columns are whitespace deliminated (maybe tab? -- I don't rememeber) # # There are user defined variables at line 390 # begun October 8, 2004 rsm || ramzi@dal.ca # completed October26, 2004 #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # SUBROUTINES TO READ THE INDIVIDUAL PARTS OF THE CODE #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: #===================================================================== # LINE-READING SUBROUTINES #--------------------------------------------------------------------- sub read_1{ if(substr($_,0,1) != 1) { die("Jim, I wasn't built for this!\n"); } my %vals = ( stn_seq_no => [1,7], one_deg_sq => [13,6], cr_number => [19,10], stn_number => [29,4], year => [33,4], month => [37,2], day => [39,2], time => [41,4], data_type => [45,2], latitude => [47,8], longitude => [55,9], q_pos => [64,1], q_date_time=> [65,1], q_record => [66,1], up_date => [67,8] ); my %field; my $family; for $family (keys %vals) { $field{$family} = substr($_,$vals{$family}[0],$vals{$family}[1]); } return(%field); } sub read_2{ if(substr($_,0,1) != 2) { die("Jim, I wasn't built for this!\n"); } my %vals = ( bull_time => [13,12], bull_header => [25,6], source_id => [31,4], stream_ident => [35,4], qc_version => [39,4], avail => [40,1], no_prof => [44,4], nparms => [48,4], sparms => [52,4], num_hists => [56,4] ); my %field; my $family; for $family (keys %vals) { $field{$family} = substr($_,$vals{$family}[0],$vals{$family}[1]); } return(%field); } sub read_3{ if(substr($_,0,1) != 3) { die("Jim, I wasn't built for this!\n"); } my %vals = ( no_seg => [13,4], prof_type => [17,4], dup_flag => [21,1], digit_code => [22,1], standard => [23,1], deep_depth => [24,8] ); my $repeats = 2; my $rl = 19; # repeat length my %field; my $family; my $i = 0; while($i <= $repeats) { for $family (keys %vals) { $field{$family}[$i] = substr($_,$vals{$family}[0]+$i*$rl,$vals{$family}[1]); } $i++; } return(%field); } sub read_4{ if(substr($_,0,1) != 4) { die("Jim, I wasn't built for this!\n"); } my %vals = ( pcode => [13,4], parm => [17,9], q_parm => [26,1] ); my $repeats = 3; my $rl = 14; my %field; my $family; my $i = 0; while($i <= $repeats) { for $family (keys %vals) { $field{$family}[$i] = substr($_,$vals{$family}[0]+$i*$rl,$vals{$family}[1]); } $i++; } return(%field); } sub read_5{ if(substr($_,0,1) != 5) { die("Jim, I wasn't built for this!\n"); } my %vals = ( pcode => [13,4], cparm => [17,10], q_parm => [27,1] ); my $repeats = 3; my $rl = 15; my %field; my $family; my $i = 0; while($i <= $repeats) { for $family (keys %vals) { $field{$family}[$i] = substr($_,$vals{$family}[0]+$i*$rl,$vals{$family}[1]); } $i++; } return(%field); } sub read_6{ if(substr($_,0,1) != 6) { die("Jim, I wasn't built for this!\n"); } my %vals = ( ident_code => [13,2], prc_code => [15,4], version => [19,4], prc_date => [23,8], act_code => [31,2], act_parm => [33,4], aux_id => [37,10], o_value => [47,10] ); my %field; my $family; for $family (keys %vals) { $field{$family} = substr($_,$vals{$family}[0],$vals{$family}[1]); } return(%field); } sub read_7{ if(substr($_,0,1) != 7) { die("Jim, I wasn't built for this!\n"); } # vals1 doesn't go through repeat loop # vals2 does my %vals1 = ( prof_type => [8,4], d_p_code => [12,1] ); my %vals2 = ( depth_press => [13,8], dp_flag => [21,1], parm => [22,11], q_parm => [33,1] ); my $repeats = 2; my $rl = 21; my (%field1, %field2); my $family; my $i = 0; for $family (keys %vals1) { $field1{$family} = substr($_,$vals1{$family}[0],$vals1{$family}[1]); } while($i <= $repeats) { for $family (keys %vals2) { $field2{$family}[$i] = substr($_,$vals2{$family}[0]+$i*$rl,$vals2{$family}[1]); } $i++; } return (\%field1, \%field2); } #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # KEYS/LEGENDS to reading data #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # "data_type" is stored in record type "1" my %data_type_legend = ( BA => "BATHY message", BF => "Batfish CTD", BO => "Bottle", BT => "general BT data", CD => "CTD down trace", CU => "CTD up trace", CT => "CTD data, up or down", DB => "Drifting buoy", DT => "Digital BT", IN => "Ship intake samples", IC => "Ice Core Data", MB => "MBT", MC => "CTD and bottle data are mixed for the station", ML => "Minilog", PF => "Profiling Float", SG => "Thermosalinograph data", TE => "TESAC message", TG => "Thermograph data", TK => "TRACKOB message", TR => "Thermistor chain", XB => "XBT", TO => "CTD towed" ); # "q_pos" (quality of position), "q_date_time" (quality of date/time), # "q_record" (quality of data) are stored in record type "1" my %q_legend = ( 0 => "No QC performed", 1 => "Value appears to be correct", 2 => "Value is inconsistent", 3 => "Value is doubtful", 4 => "Value appears to be wrong", 5 => "Value has been changed", 9 => "Value is missing" ); #STREAM_IDENT: The stream identification parameter is used to # determine whether a message duplicating one already in the archives # should replace the archive version. For example, a delayed mode # record that has passed scientific quality control should replace a # radio message version of the record. It is composed of two # parts. Each is of two characters. The first gives the source of the # data, the second the type. # "stream_ident" is stored in record type "2" my %stream_ident_legend_1 = ( AO => "Atlantic Oceanographic and Meteorological Lab", AR => "Argo Profiling Float", BI => "BIO", CF => "Canadian Navy", CS => "CSIRO in Australia", DA => "Dalhousie University", FN => "FNOC in Monterey, California", FR => "Orstom, Brest", FW => "Fresh Water Institute (Winnipeg)", GE => "BSH, Germany", IC => "ICES", IK => "Institut Fur Meersekunde Kiel", IM => "IML", IO => "IOS in Pat Bay, BC", JA => "Japanese Meteorological Agency", JF => "Japan Fisheries Agency", ME => "MEDS", MO => "Moncton", MU => "Memorial University", NA => "NAFC", NO => "NODC (Washington)", OD => "Old Dominion Univ, USA", NW => "US National Weather Service", RU => "Russian Federation", SA => "St Andrews", SI => "Scripps Institute of Oceanography", TI => "Tiberon lab US", UB => "University of BC", UQ => "University of Quebec at Rimouski", WH => "Woods Hole" ); my %stram_ident_legend_2 = ( BA => "BATHY data", BO => "Historical bottle data", CT => "CTD data", DM => "Delayed mode version fromoriginator", DQ => "Delayed mode version with additional QC", HI => "Historical Canadian BT", NS => "Historical foreign BT", RM => "radio message", RQ => "Radio message with scientific QC", TE => "TESAC" ); # "avail" (availability/accessability of data) is stored in record type "2" my %avail_legend = ( A => "available to all", D => "deleted entry (used in IHO_REC)", I => "Available to MEDS and originator", O => "available to originator only", P => "Protected and so available to a select few" ); # "prof_type" (data in profile) is stored in record type "3" my %prof_type_legend = ( "TEMP" => ["Temperature", "Degrees C"], "PSAL" => ["Practical Salinity", "PSU"], "SSAL" => ["Pre 1978 Salinity scale","PPT"], "USAL" => ["Unknown Salinity", "?"], "DOXY" => ["Dissolved Oxygen", "mmol/m^3"], "CALK" => ["Carbonate Alkalinity","mmol/m^3"], "CPHL" => ["Chlorophyll-A", "mmol/m^3"], "CSPD" => ["Current speed", "m/s"], "CDIR" => ["Current direction", "Degrees true"], "CDX1" => ["Dissolved Organic Carbon", "mmol/m^3"], "FLO1" => ["Flouride", "mmol/m^3"], "FLU1" => ["Fluorescence", '%'], "FLU2" => ["Fluorescence", '%'], "FLU\$" => ["Fluorescence", '%'], "HHMM" => ["Hours and minutes", "N/A"], "NH3\$" => ["Ammonia", "mmol/m^3"], "NTRA" => ["Nitrate", "mmol/m^3"], "NTRI" => ["Nitrite", "mmol/m^3"], "NTRZ" => ["Nitrate + Nitrite", "mmol/m^3"], "CPX1" => ["Particulate Carbon", "mmol/m^3"], "OSI\$" => ["Originaator\'s Sample Identifier","---"], "PAR\$" => ["Photosyntheticly Active Radiation","???"], "PHA\$" => ["Phaeophytin","kg/l * 10^-9"], "PHI\$" => ["Inorganic Phosphate", "mmol/m^3"], "PHPH" => ["pH", "N/A"], "PHOS" => ["Phosphate", "mmol/m^3"], "POC\$" => ["Particulate Organic Carbon","??"], "PON\$" => ["Particulate Organic Nitrogen","??"], "SLCA" => ["Silicate", "mmol/m^3"], "SVEL" => ["Sound velocity", "m/s"], "SUL\$" => ["Sulphate", "mmol/m^3"], "PLT\$" => ["Transmissivity", "percent"], "ALKY" => ["Total Alkalinity", "mmol/m^3"], "TCD\$" => ["Total Carbon Dissolved?","??"], "TOTP" => ["Total Phosphorus", "mmol/m^3"], "TUR\$" => ["Turbidity",'%'] ); # "standard" (accuracy of data in profile) is stored in record type "3" my %standard_legend = ( 0 => "No value measured", 1 => "Accuracy better than 0.01", 2 => "Accuracy less than 0.01", 3 => "Sample Analysis", 4 => "In situ sensor, unknown accuracy", 5 => "In situ sensor, accurate to 0.001", B => "Accuracy is between 0.001 and 0.002", C => "Accuracy is between 0.002 and 0.005", E => "Accuracy is between 0.01 and 0.02", F => "Accuracy is between 0.02 and 0.05", H => "Accuracy is between 0.1 and 0.2", I => "Accuracy is greater than 0.02", U => "Unknown" ); # "ident code" (indicates organization responsible for history record) # is stored in type "6" my %ident_code_legend = ( ME => "MEDS", AO => "AOML", CS => "CSIRO" ); #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # THE MAIN PART #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: #--------------------------------------------------------------------- # User-defined variables #--------------------------------------------------------------------- my $data_dir = '../data/MEDS/'; my $filename = 'MIRSHAK.DAT'; my $MEDS_in_file = $data_dir.$filename; my $junk_file = $data_dir."junk"; my $out_dir = $data_dir.'Profiles/'; #--------------------------------------------------------------------- # global variable declarations my %header; my %profile_id_record; my %stn_var_record; my %stn_hist_record; my %profile_data; my %profile_data_quality; my %profile_record1; my %profile_record2; my @depth; my @depth_tmp; my @NAN = ("NaN"); my @parts; my @temp; my $count; my $depth_length; my $f_line; my $flag; my $family; my $hdr; my ($i, $j, $k, $kk); my $last_num; my $new_prof_type; my $old_prof_type; my $profile_entry; my $profile_line; my $prof_num; my ($prof_1_ref, $prof_2_ref); #===================================================================== # Find out how many lines are in the file #--------------------------------------------------------------------- system("wc $MEDS_in_file > $junk_file"); open(FJ, $junk_file); my $n_line = ; close(FJ); system("rm $junk_file"); @parts = split(/\s+/,$n_line); $n_line = $parts[1]; #--------------------------------------------------------------------- open(FIN, $MEDS_in_file); $count = 0; $last_num = 0; $flag = 0; THELOOP: while($count < $n_line) { ++$count; $_ = ; chomp; @parts = split(//,$_); $f_line = $parts[0]; $prof_num = substr($_,1,7); # if($prof_num < 3296) { next THELOOP; } # if($prof_num > 3297) { # close(FOUT); # last THELOOP; # } $hdr = "# Profile $prof_num\n"; # Starting a new profile if($prof_num > $last_num) { $kk = 0; if ($last_num > 0) { $k = 0; @depth = @depth_tmp; $depth_length = scalar(@depth); while($k < $depth_length) { print PROFILE $depth[$k]; $j = 0; while ($j < $header{no_prof}) { print PROFILE " " . $profile_data{$j}[$k]; $j++; } $j = 0; while ($j < $header{no_prof}) { print PROFILE " " . $profile_data_quality{$j}[$k]; $j++; } print PROFILE "\n"; $k++; } system "cp ". $out_dir . $last_num . ".txt ". $out_dir . substr((360-$header{longitude}),0,6) . "_" . substr($header{latitude},1,5) . "_" . $header{year} . "_" . $header{month} . "_" . $header{day} . "_" . $header{time} . ".txt\n"; } $profile_line = 0; $profile_entry = -1; $old_prof_type = "STRT"; %profile_data = (); %profile_data_quality = (); @depth_tmp = (); @depth = (); close(PROFILE); open(PROFILE, "> " . $out_dir . $prof_num . ".txt"); $last_num = $prof_num; $flag = 1; print PROFILE "# MEDS profile data, profile $prof_num from $filename\n"; print PROFILE "# Extracted using Perl Script \"read_MEDS.pl\"\n"; } SWITCH: { if($f_line == 1) { %header = read_1($_); # print FOUT ":::::::::: START LINE 1 ::::::::::\n"; # Start new file header for $family (keys %header) { # print FOUT "# $family: $header{$family}\n"; } # print FOUT "::::::::::: END LINE 1 :::::::::::\n"; last SWITCH; } if($f_line == 2) { # print FOUT ":::::::::: START LINE 2 ::::::::::\n"; %header = (%header, read_2($_)); # for $family (keys %header) { # print FOUT "# $family: $header{$family}\n"; # } # print FOUT "::::::::::: END LINE 2 :::::::::::\n"; print PROFILE "# Data Type: $data_type_legend{$header{data_type}}\n"; print PROFILE "# Lon/Lat: " . (360-$header{longitude}) . "/$header{latitude} ($q_legend{$header{q_pos}})\n"; print PROFILE "# Date/time (y/m/d/hhmm): " . "$header{year}/$header{month}/$header{day}/$header{time}" . " ($q_legend{$header{q_date_time}})\n"; print PROFILE "# Number of parameter profiles: " . $header{no_prof} . "\n"; last SWITCH; } #--------------------------------------------------------------------- if($f_line == 3) { # print FOUT ":::::::::: START LINE 3 ::::::::::\n"; %profile_id_record = read_3($_); $i = 0; while ($i <= 2) { # for $family (keys %profile_id_record) { # print FOUT "# $family: $profile_id_record{$family}[$i]\n"; # # } if ($profile_id_record{prof_type}[$i] =~ m/[a-z]/i ) { if (exists($prof_type_legend{$profile_id_record{prof_type}[$i]})) { ++$kk; print PROFILE "# Profile $kk contents: " . $profile_id_record{prof_type}[$i] . " [". $prof_type_legend{$profile_id_record{prof_type}[$i]}[1] . "] " . ($standard_legend{$profile_id_record{standard}[$i]}) . "\n"; } else { print $profile_id_record{prof_type}[$i] . " missing from " . '%profile_type_legend'."\n"; } } $i++; } # print FOUT "::::::::::: END LINE 3 :::::::::::\n"; last SWITCH; } if($f_line == 4) { # print FOUT ":::::::::: START LINE 4 ::::::::::\n"; %stn_var_record = read_4($_); $i = 0; while ($i <= 3) { # for $family (keys %stn_var_record) { # print FOUT "# $family: $stn_var_record{$family}[$i]\n"; # } $i++; } # print FOUT "::::::::::: END LINE 4 :::::::::::\n"; last SWITCH; } # if($f_line == 5) { # print FOUT ":::::::::: START LINE 5 ::::::::::\n"; # %stn_var_record = (%stn_var_record, read_5($_)); # $i = 0; # while ($i <= 3) { # for $family (keys %stn_var_record) { # print FOUT "# $family: $stn_var_record{$family}[$i]\n"; # } # $i++; # } # print FOUT "::::::::::: END LINE 5 :::::::::::\n"; # last SWITCH; # } # if($f_line == 6) { # print FOUT ":::::::::: START LINE 6 ::::::::::\n"; # %stn_hist_record = read_6($_); # for $family (keys %stn_hist_record) { # print FOUT "# $family: $stn_hist_record{$family}\n"; # } # print FOUT "::::::::::: END LINE 6 :::::::::::\n"; # last SWITCH; # } if($f_line == 7) { # print FOUT ":::::::::: START LINE 7 ::::::::::\n"; ($prof_1_ref, $prof_2_ref) = read_7($_); %profile_record1 = %$prof_1_ref; %profile_record2 = %$prof_2_ref; # for $family ( keys %profile_record1 ) { # print FOUT "$family: $profile_record1{$family}\n"; # } if ($flag == 1) { print PROFILE "# Quality control flags in columns following data\n"; if ($profile_record1{d_p_code} =~ m/d/i) { print PROFILE"# Depth units: m to " . $profile_id_record{deep_depth}[0] ." m\n"; $flag = 0; } elsif ($profile_record1{d_p_code} =~ m/p/i) { print PROFILE"# Depth units: dbar to " . $profile_id_record{deep_depth}[0] ." dbar\n"; $flag = 0; } } $new_prof_type = $profile_record1{prof_type}; $new_prof_type =~ s/\$/x/; if ($new_prof_type !~ $old_prof_type) { # print "Starting anew...$old_prof_type--\>$new_prof_type\n"; $old_prof_type = $new_prof_type; $profile_entry++; $profile_line = 0; # print @depth_tmp; # print "\n"; @depth = @depth_tmp; @depth_tmp = (); } $i = 0; while ($i <= 2) { # for $family (keys %profile_record2) { # print FOUT "$family: $profile_record2{$family}[$i]\n"; # } if ($profile_record2{depth_press}[$i] =~ m/[0-9]/ ) { # print $profile_record2{depth_press}[$i]."\t" . # $profile_record2{parm}[$i]."\n"; $depth_tmp[$profile_line] = $profile_record2{depth_press}[$i]; # Reset depth and change $profile_entry if new # parameter has started # if ($profile_line > 0) { # if($depth_tmp[$profile_line] <= # $depth_tmp[$profile_line-1]) { # pop(@depth_tmp); # # @depth = @depth_tmp; # $depth_length = scalar(@depth); # @depth_tmp = (); # $profile_entry++; # $profile_line = 0; # $depth_tmp[$profile_line] = # $profile_record2{depth_press}[$i]; # } # } if ($profile_entry > 0) { # print $depth_tmp[$profile_line] . "\t" . $depth[$profile_line] . "\n"; while ( (scalar(@depth_tmp) <= scalar(@depth)) && ($depth_tmp[$profile_line] > $depth[$profile_line])) { $depth_tmp[$profile_line] = $depth[$profile_line]; $profile_data{$profile_entry}[$profile_line] = "NaN"; $profile_data_quality{$profile_entry}[$profile_line] = 9; $profile_line++; $depth_tmp[$profile_line] = $profile_record2{depth_press}[$i]; } if ( (scalar(@depth_tmp) > scalar(@depth)) || ($depth_tmp[$profile_line] < $depth[$profile_line])) { $j = 0; # print @depth_tmp; # print "\n"; # print @depth; # print "\n"; splice (@depth,$profile_line,0, $depth_tmp[$profile_line]); while ($j < $profile_entry) { @temp = @{ $profile_data{$j} }; splice (@temp, $profile_line,0,"NoN"); @{ $profile_data{$j} } = @temp; @temp = @{$profile_data_quality{$j}}; splice (@temp, $profile_line,0,9); @{ $profile_data_quality{$j} } = @temp; # print $j."\n"; $j++; } $depth_length++; # if($j > 1) { # print $prof_num. "\t" . $j . "\n"; # print @depth."\n"; # print @depth_tmp."\n"; # die (@temp);#. "\n" . @temp}; # } } } $profile_data{$profile_entry}[$profile_line] = $profile_record2{parm}[$i]; $profile_data_quality{$profile_entry}[$profile_line] = $profile_record2{q_parm}[$i]; $profile_line++; } $i++; } # print FOUT "::::::::::: END LINE 7 :::::::::::\n"; last SWITCH; } } } close(FIN);