FOSDIC Deck 195 Decoding

Decoding Details

  • cassette / reel information is maintained in headerlines that begin with ‘00000195’, other details about the FOSDIC conversion format not pertaining to the original records are available at https://rda.ucar.edu/datasets/ds258.4/docs/fosdic_description.txt

  • This project is focused on providing ship type information and connecting to meta information for SST/Seawter Temp Bias (but also to look into air bias). To that end, present weather and sea state will remain as strings but ship number and temperatures will get converted per description.

  • Final entry will enclude the complete FOSDIC record entry for future reference

  • Location, date/time will also be converted to geocoordinates.

  • The only QC that will happen other than conversion checks, is to remove empty entries

  • formatting (decimals and integers) should remain close to initial values if possible, however, replacing invalid data with NaN (pythonic) is a float operation… the raw character string is made available for the purposes of addressing this type of conversion challenge.

import pandas as pd
import numpy as np

import datetime
with open('../../FOSDIC_COPY/fosdic_cd195','r') as f:
    file = f.readlines()
#define some essential conversion functions from the Deck 195 Manual

def wind_dir(x):
    try:
        return int(int(x)*10)
    except:
        return np.nan

def slp(x):
    try:
        return float(x)/100
    except:
        return np.nan
    
def airtemp(x):
    if x[0] == '0':
        try:
            return (float(x))
        except:
            return np.nan
    elif x[0] != ' ':
        try:
            return (-1*float(x[1::]))
        except:
            return np.nan
    else:
        return np.nan
    
def watertemp(x):
        try:
            return (float(x))
        except:
            return np.nan    

def inport(x): #make anything other than blank "TRUE"
    if x[0] != ' ':  
        return True
    else:
        return False

def geo_loc(quadrant,latitude,longitude):
    try:
        if quadrant == '0':
            return [int(latitude),-1*int(longitude)]
        elif quadrant == '1':
            return [int(latitude),int(longitude)]    
        elif quadrant == '2':
            return [-1*int(latitude),-1*int(longitude)]
        elif quadrant == '3':
            return [-1*int(latitude),int(longitude)]
        else:
            return [np.nan,np.nan]
    except:
            return [np.nan,np.nan]
        
counter = 0
data = {}
for count, row in enumerate(file):
    if (row[:8] == '00000195'):
        print(f'Headerlines: rowumber-{count}')
    elif row == '                                                                                \n': #blank line
        continue
    else:
        #create dictionary where key is row number                    
        data.update({counter:{'shipclass':row[0:2],
                    'shipno':row[2:5],
                    'year':row[5:7],
                    'month':row[7:9],
                    'day':row[9:11],
                    'hour':row[11:13],
                    'quadrant':row[13:14],
                    'lat_coded':row[14:16],
                    'lon_coded':row[16:19],
                    'ship_speed_kts':row[19:21], #knots
                    'ship_course_deg':row[21:23], #degrees
                    'wind_dir_deg':wind_dir(row[23:25]), #*10 as its cut to nearest ten degrees - and make integer, not float
                    'wind_speed_kts':row[25:27], #knots
                    'sealevelpressure_inHg':slp(row[27:31]), #in
                    'drybulb_temperature_degF':airtemp(row[31:34]), #degF
                    'wetbulb_temperature_degF':airtemp(row[34:37]), #degF
                    'water_injection_temperature_degF':watertemp(row[37:39]), #degF
                    'present_wx':row[39:48], #see encoding but this involves multiple fields: wx, clouds, vis
                    'sea_surface_temperature_degF':watertemp(row[48:50]), #degF
                    'sea_and_swell':row[50:56], #see encoding but this involves multiple fields
                    'inport_obs_indicator':inport(row[56:57]), #X=True,blank=False
                    #58 on - not used
                    'latitude_DegN':geo_loc(row[13:14],row[14:16],row[16:19])[0], #+N,+E
                    'longitude_DegE':geo_loc(row[13:14],row[14:16],row[16:19])[1], #+N,+E
                    'datetime':'19'+row[5:7]+'-'+row[7:9]+'-'+row[9:11]+' '+row[11:13]+':00:00',
                    'rawentry_80char':row
                    }})
        counter +=1

        
Headerlines: rowumber-0
Headerlines: rowumber-11900
Headerlines: rowumber-24532
Headerlines: rowumber-36153
Headerlines: rowumber-48354
Headerlines: rowumber-60146
Headerlines: rowumber-72265
Headerlines: rowumber-84308
Headerlines: rowumber-96273
Headerlines: rowumber-108434
Headerlines: rowumber-120708
Headerlines: rowumber-133038
Headerlines: rowumber-145374
Headerlines: rowumber-157128
Headerlines: rowumber-169710
Headerlines: rowumber-181385
Headerlines: rowumber-193689
Headerlines: rowumber-205917
Headerlines: rowumber-217943
Headerlines: rowumber-229640
Headerlines: rowumber-241251
Headerlines: rowumber-252753
Headerlines: rowumber-264969
Headerlines: rowumber-277353
Headerlines: rowumber-289685
Headerlines: rowumber-301329
Headerlines: rowumber-313251
Headerlines: rowumber-325285
Headerlines: rowumber-337365
Headerlines: rowumber-348882
Headerlines: rowumber-360892
Headerlines: rowumber-372911
Headerlines: rowumber-385279
Headerlines: rowumber-397246
Headerlines: rowumber-409214
Headerlines: rowumber-421541
Headerlines: rowumber-433915
Headerlines: rowumber-445552
Headerlines: rowumber-457497
Headerlines: rowumber-470070
Headerlines: rowumber-481914
Headerlines: rowumber-493433
Headerlines: rowumber-505361
Headerlines: rowumber-517759
Headerlines: rowumber-529326
Headerlines: rowumber-541973
Headerlines: rowumber-553944
Headerlines: rowumber-565653
Headerlines: rowumber-577107
Headerlines: rowumber-589695
Headerlines: rowumber-601358
Headerlines: rowumber-613256
Headerlines: rowumber-625754
Headerlines: rowumber-636789
df = pd.DataFrame.from_dict(data,orient='index')
df.to_csv('../data/FOSDIC_cd195.csv',index=False)

Some Initial Stats

Number of unique Ship Classes is: 93 Number of unique Ship Classes with >10 samples is: 76
this does have some clearly challenged values still like, -1 & J1 but represents 638678 of 638709 samples

All Ship Class Groups are shown below

df.groupby('shipclass').groups.keys()
dict_keys(['  ', ' 1', ' 2', ' 4', ' 5', ' 6', ' 8', ' 9', '&&', '-1', '0 ', '00', '01', '02', '03', '06', '07', '08', '09', '1 ', '1*', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '54', '55', '56', '58', '59', '60', '61', '62', '65', '68', '71', '76', '77', '78', '81', '82', '84', '85', '86', '88', '89', '9 ', '90', '91', '94', '95', '96', '97', '98', 'J1', 'Z5'])

And ship counts within each group

for i,g in df.groupby('shipclass'):
    print(i,g.shipno.unique())
   ['   ']
 1 ['111' '145' '265']
 2 ['221']
 4 [' 76' '394' '012']
 5 ['229']
 6 ['048' '012' '106']
 8 ['131']
 9 ['   ']
&& ['&&&' ')3&']
-1 ['318' '292' '015']
0  ['010']
00 ['007']
01 ['061' '041' '040' '034' '038' '045' '046' '042' '033' '544' '044' '060'
 '056' '058' '059' '036' '035' '063' '262']
02 ['130' '027' '039' '072' '070' '136' '068' '071' '073' '028' '024' '029'
 '038' '032' '035' '031' '069']
03 ['002' '001']
06 ['052' '042' '006' '012' '004' '013' '005' '086' '103' '040' '104' '007'
 '011' '010' '066' '060' '091' '046' '047' '065' '055' '048' '080' '087'
 '051' '105' '054' '008' '050' '062' '053' '056' '041' '082' '058' '095'
 '063' '085' '009' ')04' '043']
07 ['004' '031' '006' '013' '015' '025' '002' '003' '017' '024' '021' '016'
 '009' '014' '020' '011' '012']
08 ['029' '011' '026' '025' '034' '013' '020' '031' '016']
09 ['024' '026' '025' '029' '050']
1  ['129' '242']
1* ['184']
10 ['009' '069' '011' '013' '072' '001' '031' '029' '012' '020' '023' '063'
 '071' '018' '028' '096' '103' '100' '104' '068' '090' '097' '016' '-07'
 '060' '059' '070' '085' '076' '064' '075' '099' '074' '088' '086' '058'
 '098' '082' '102' '025' '067' '0*5' '021' '20-' '057']
11 ['- 4' '015']
12 ['210' '457' '362' '458' '454' '395' '359' '462' '396' '381' '440' '432'
 '247' '248' '215' '228' '216' '211' '217' '154' '148' '149' '150' '629'
 '405' '477' '873' '710' '156' '358' '394' '647' '441' '223' '361' '382'
 '551' '741' '552' '494' '874' '570' '581' '220' '880' '463' '493' '798'
 '476' '587' '383' '442' '571' '576' '363' '709' '569' '430' '573' '876'
 '655' '588' '732' '517' '520' '666' '628' '832' '686' '706' '425' '419'
 '421' '402' '574' '806' '729' '222' '654' '724' '689' '691' '641' '360'
 '487' '357' '611' '495' '230' '497' '341' '878' '586' '662' '471' '742'
 '518' '600' '649' '575' '679' '483' '519' '479' '221' '624' '453' '439'
 '399' '356' '366' '379' '386' '388' '367' '515' '435' '365' '391' '371'
 '661' '385' '368' '491' '486' '554' '433' '460' '414' '393' '398' '465'
 '387' '445' '411' '434' '213' '350' '544' '066' '472' '377' '605' '659'
 '598' '041' '508' '353' '470' '480' '349' '660' '670' '644' '468' '407'
 '390' '481' '675' '606' '473' '580' '623' '636' '632' '246' '426' '159'
 '147' '153' '157' '428' '620' '145' '144' '422' '128' '578' '427' '239'
 '126' '807' '251' '151' '420' '152' '118' '158' '188' '572' '403' '461'
 '626' '418' '805' '862' '455' '424' '429' '511' '550' '456' '743' '831'
 '583' '610' '109' '164' '233' '563' '492' '488' '638' '602' '608' '380'
 '579' '546' '658' '526' '351' '669' '792' '585' '681' '734' '355' '417'
 '650' '630' '106' '510' '528' '446' '577' '631' '547' '423' '155' '160'
 '637' '640' '601' '808' '756' '408' '613' '604' '187' '431' '343' '509'
 '621' '437' '*7 ' '115' '116' '237' '137' '450' '514' '406' '113' '498'
 '234' '560' '558' '562' '218' '392' '231' '545' '685' '484' '680' '668'
 '529' '527' '609' '612' '634' '617' '635' '633' '489' '618' '199' '438'
 '615' '627' '490' '496' '614' '625' '142' '464' '697' '603' '242' '232'
 '557' '800' '607' '796' '553' '642' '372' '643' '645']
13 ['359' '383']
14 ['326' '174' '178' '135' '240' '182' '142' '139' '705' '181' '179' '101'
 '016' '176' '177' '100' '175' '408' '341' '681' '797' '581' '583' '200'
 '698' '795' '368' '363' '259' '683' '218' '536' '701' '231' '586' '420'
 '345' '697' '349' '708' '201' '202' '203' '796' '589' '418' '331' '330'
 '063' '168' '141' '343' '184' '449' '406' '510' '382' '152' '415' '304'
 '372' '340' '411' '419' '438' '049' '450' '022' '261' '739' '199' '680'
 '023' '041' '026' '011' '009' '017' '019' '025' '035' '032' '040' '030'
 '644' '265' '034' '262' '339' '405' '213' '070' '146' '575' '190' '693'
 '327' '329' '763' '398' '196' '682' '792' '322' '138' '246' '134' '015'
 '039' '260' '263' '020' '634' '637' '014' '167' '366' '050' '640' '029'
 '745' '166' '530' '045' '150' '148' '051' '153' '214' '799' '006' '129'
 '249' '332' '147' '684' '060' '352' '047' '362' '578' '399' '335' '694'
 '685' '665' '059' '334' '008' '642' '028' '053' '005' '137' '169' '253'
 '252' '387' '392' '131' '102' '258' '220' '385' '325' '194' '794' '400'
 '105' '242' '323' '793' '161' '197' '210' '057' '529' '065' '790' '212'
 '173' '764' '054' '215' '132' '130' '251' '151' '676' '068' '317' '393'
 '384' '394' '241' '191' '066' '133' '010' '171' '769' 'S10' '320' '33 '
 '018' 'Z18' '423' '585' '643' '696' '636' '  5' '535' '198' '007']
15 ['235' '165' '168' '477' '317' '215' '242' '229' '230' '119' '398' '394'
 '118' '405' '129' '126' '207' '395' '384' '179' '138' '299' '390' '388'
 '258' '183' '117' '366' '287' '116' '163' '186' '391' '339' '400' '479'
 '481' '243' '241' '153' '396' '251' '476' '392' '365' '246' '393' '316'
 '397' '402' '295' '381' '331' '329' '383' '361' '419' '421' '362' '332'
 '313' '423' '336' '373' '422' '244' '314' '386' '315' '245' '385' '216'
 '232' '408' '399' '410' '406' '271' '272' '192' '175' '208' '167' '334'
 '475' '199' '276' '274' '169' '337' '409' '333' '374' '309' '187' '210'
 '278' '180' '424' '171' '418' '369' '221' '387' '247' '158' '154' '236'
 '145' '203' '142' '213' '480' '312' '120' '092' '340' '184' '341' '330'
 '198' '262' '281' '282' '206' '182' '254' '181' '567' '264' '228' '308'
 '250' '238' '284' '266' '202' '200' '178' '201' '224' '304' '220' '217'
 '283' '260' '239' '306' '255' '189' '240' '218' '261' '286' '293' '225'
 '214' '305' '263' '231' '234' '172' '412' '237' '268' '269' '288' '414'
 '259' '*54' '223' '256' '310' '307' '311' '267' '401' '403' '146' '404'
 '292' '300' '188' '280' '367' '372' '265' '212' '285' '249' '325' '194'
 '328' '270' '382' '413' '389' '298' '086' '170' '454' '123' '318' '319'
 '326' '327' '222' '193' '144' '185' '191' '050' '322']
16 ['032' '001']
17 ['026' '020' '001' '002' '009' '008' '017']
18 ['009' '012' '011']
19 ['003']
20 ['018' '015' '019' '017' '021' '020' '016' '022']
21 ['018' '005' '033' '009' '011' '012' '007' '008' '016' '010' '015' '017'
 '004' '034' '014']
22 ['255' '258' '257' '076' '058' '128' '131' '217' '220' '219' '127' '031'
 '043' '062' '013' '087' '025' '022' '108' '111' '110' '317' '323' '297'
 '016' '056' '059' '268' '246' '060' '055' '322' '304' '109' '320' '324'
 '298' '300' '154' '118' '267' '119' '002' '123' '270' '107' 'S58' '136'
 '137' '274' '104' '283' '305' '152' '084' '126' '102' '117' '120' '116'
 '105' '153' '271' '024' '072' '075' '0  ' '113' '079' '103' '007' '156'
 '083' '021' '035' '082']
23 ['017']
25 ['238']
26 ['006' '004']
27 ['006' '008' '004']
28 ['018' '015' '014']
29 ['005' '002']
30 ['-01' '057']
31 ['053' '034' '024' '001' '032' '010' '012' '013' '047' '003' '005' '009'
 '038']
32 ['050' '063' '064' '070' '052' '059' '008' '058' '065' '071' '066' '087'
 '067' '089' '086' '102' '062' '101' '068' '056' '022']
33 ['005' '009' '008' '004' '010' '001' '012']
34 ['257']
35 ['012' '018' '028' '009' '023']
37 ['019' '022' '017' '028' '024' '013' '025' '021' '012' '016' '011' '010']
38 ['021' '009' '020' '018' '017' '008' '019' '003' '045']
39 ['013' '009' '004' '003' '008' '005' '006' '014' '011' '001' 'X04' '010'
 '002' '012' '007']
40 ['121' '110' '320' '460' '425' '068' '217' '182' '025' '003' '169' '168'
 '172' '067' '173' '197']
41 ['015' '021' '004' '009' '011' '003' '019' '012' '002' '014' '016' '-78'
 '0-3']
42 ['002' '005' '247' '003' '006' '008' '001']
43 ['034' '011' '042' '031' '015' '025' '030' '014' '016' '012' '010' '008'
 '007' '001' '0 0' '022' '013' '020' '009' '029' '021' '019' '026' '024']
44 ['005' '001' '004']
45 ['001' '004' '005']
46 ['030' '070' '068' '044' '032' '042' '033' '067' '046' '097' '029' '066'
 '031' '012']
47 ['055' '048' '062' '061' '053' '056']
48 ['004' '005' '008' '007' '002' '020']
49 ['001' '003']
50 ['001' '011' '013' '017' '006' '005' '009' '010']
51 ['013' '023' '136' '077' '121' '024' '018' '129' '207' '123' '133' '124'
 '113' '170' '118' '119' '025' '027' '080' '111' '100' '020' '072' '042'
 '101' '090' '103' '097' '071' '096' '095' '099' '014' '102' '016' '106'
 '110' '161' '162' '139' '125' '048' '047' '017' '112' '015' '140' '138'
 '126' '051' '022' '049' '105' '029' '046' '094' '079' '108' '074' '073'
 '053' '052']
52 ['036' '087' '014' '092' '108' '105' '106' '040' '053' '104' '011' '006'
 '008' '009' '010' '020' '090' '003' '002' '015' '089' '012' '013']
54 ['003' '004' '002' '005' '001']
55 ['004' '008' '011' '001' '003']
56 ['001' '002']
58 ['032' '034' '087' '084' '060' '028' '043' '053' '055' '005' '052' '049'
 '056' '013' '008' '027' '020']
59 ['045' '009' '036' '037' '022' '015' '065' '075' '071' '050' '033' '042'
 '073' '040' '049' '026' '038' '002' '039' '046' '079' '018' '076' '035'
 '058' '012' '048' '043' '025' '027' '024' '004' '074' '047' '021' '086'
 '016' '041' '072' '011' '019' '053' '081' '088' '084' '096' '001' '030'
 '017' '028' '029' '013']
60 ['021' '019' '050' '006' '024' '036' '022' '009' '008' '005' '013' '002'
 '012' '061' 'U61' '010' '014' '017' '018' '001']
61 ['153' '024' '020' '023' '021' '061' '131' '132' '022' '115' '136' '114'
 '035' '120' '112' '110' '125' '032' '038' '154' '113' '029' '133' '144'
 '041' '015' '145' 'S41' '427' '033' '063' '054' '008' '013' '055' '134'
 '075' '163' '138' '026' '060' '067' '170' '158' '016' '001' '102' '072'
 '076' '066' '017' '011' '010' '037']
62 ['013' '030' '033' '028' '017' '051' '005' '198' '020' '003' '018' '055'
 '200' '012' '040' '224' '086' '084' '011' '223' '019' '031' '050' '090'
 '191' '091' '049']
65 ['066' '086' '021' '001' '024' '078' '038' '019' '096' '091' '123' '081'
 '032' '124' '125' '003' '006' '010' '042' '008' '013' '009' '012' '020'
 '022' '017' '005']
68 ['003' '117' '002' '001']
71 ['109']
76 ['001' '002']
77 ['021' '011' '004' '006' '009' '010' '007']
78 ['007' '003' '001' '006' '004']
81 ['258' '105' '248' '127' '252' '153' '109' '164' '235' '137' '148' '136']
82 ['008']
84 ['007' '003' '005']
85 ['001']
86 ['023' '004' '008' '019' '001' '007']
88 ['020' '021' '015' '023' '032' '013' '036' '037' '004' '005']
89 ['011' '050' '022' '010' '003' '021' '009' '006' '029' '007' '013' '001'
 '024' '023' '005' '004' '041' '049' '008' '002']
9  [' 11']
90 ['006']
91 ['258']
94 ['013' '011' '024' '022' '026' '005' '020' '014' '016' '012' '021' '015'
 '003' '019' '018' '023' 'S05' '025']
95 ['004' '003' '008' '002' '007' '005']
96 ['097' '091' '102' '104' '148' '105' '103' '101' '096' '072' '109' '113'
 '085' '108' '066' '071' '111' '095' '092']
97 ['082' '081' '068' '067' '086' '140' '065' '064' '012' '039' '034' '083'
 '072' '076' '093' '070' '019' '063' '021' '085' '029' '095' '132' '020'
 '136' '027' '144' '143' '142' '131']
98 ['145' '024' '131' '039' '142' '034']
J1 ['015']
Z5 ['003']

Notice there are ship numbers that are encoded incorrectly, as well as ship classes encoded incorrectly (non-numeric characters). We will populate all these as “unidentifiable vessels” even though you may be able to retrieve a few entries via collocation with other entries in the intial fosdic records.

Prepare for Merging with Vessel Meta Archive

#declare non-numeric ship and class id's as uncrecoverable meta matchable

df['VesselMeta_Availability']=True

for i,g in df.iterrows():
    try:
        float(g.shipno)
    except:
        df.loc[i,'VesselMeta_Availability'] = False
    try:
        float(g.shipclass)
    except:
        df.loc[i,'VesselMeta_Availability'] = False        
        
pdf = df[df['VesselMeta_Availability']]
for i,g in pdf.groupby('shipclass'):
    print(i,g.shipno.unique())
 1 ['111' '145' '265']
 2 ['221']
 4 [' 76' '394' '012']
 5 ['229']
 6 ['048' '012' '106']
 8 ['131']
-1 ['318' '292' '015']
0  ['010']
00 ['007']
01 ['061' '041' '040' '034' '038' '045' '046' '042' '033' '544' '044' '060'
 '056' '058' '059' '036' '035' '063' '262']
02 ['130' '027' '039' '072' '070' '136' '068' '071' '073' '028' '024' '029'
 '038' '032' '035' '031' '069']
03 ['002' '001']
06 ['052' '042' '006' '012' '004' '013' '005' '086' '103' '040' '104' '007'
 '011' '010' '066' '060' '091' '046' '047' '065' '055' '048' '080' '087'
 '051' '105' '054' '008' '050' '062' '053' '056' '041' '082' '058' '095'
 '063' '085' '009' '043']
07 ['004' '031' '006' '013' '015' '025' '002' '003' '017' '024' '021' '016'
 '009' '014' '020' '011' '012']
08 ['029' '011' '026' '025' '034' '013' '020' '031' '016']
09 ['024' '026' '025' '029' '050']
1  ['129' '242']
10 ['009' '069' '011' '013' '072' '001' '031' '029' '012' '020' '023' '063'
 '071' '018' '028' '096' '103' '100' '104' '068' '090' '097' '016' '-07'
 '060' '059' '070' '085' '076' '064' '075' '099' '074' '088' '086' '058'
 '098' '082' '102' '025' '067' '021' '057']
11 ['015']
12 ['210' '457' '362' '458' '454' '395' '359' '462' '396' '381' '440' '432'
 '247' '248' '215' '228' '216' '211' '217' '154' '148' '149' '150' '629'
 '405' '477' '873' '710' '156' '358' '394' '647' '441' '223' '361' '382'
 '551' '741' '552' '494' '874' '570' '581' '220' '880' '463' '493' '798'
 '476' '587' '383' '442' '571' '576' '363' '709' '569' '430' '573' '876'
 '655' '588' '732' '517' '520' '666' '628' '832' '686' '706' '425' '419'
 '421' '402' '574' '806' '729' '222' '654' '724' '689' '691' '641' '360'
 '487' '357' '611' '495' '230' '497' '341' '878' '586' '662' '471' '742'
 '518' '600' '649' '575' '679' '483' '519' '479' '221' '624' '453' '439'
 '399' '356' '366' '379' '386' '388' '367' '515' '435' '365' '391' '371'
 '661' '385' '368' '491' '486' '554' '433' '460' '414' '393' '398' '465'
 '387' '445' '411' '434' '213' '350' '544' '066' '472' '377' '605' '659'
 '598' '041' '508' '353' '470' '480' '349' '660' '670' '644' '468' '407'
 '390' '481' '675' '606' '473' '580' '623' '636' '632' '246' '426' '159'
 '147' '153' '157' '428' '620' '145' '144' '422' '128' '578' '427' '239'
 '126' '807' '251' '151' '420' '152' '118' '158' '188' '572' '403' '461'
 '626' '418' '805' '862' '455' '424' '429' '511' '550' '456' '743' '831'
 '583' '610' '109' '164' '233' '563' '492' '488' '638' '602' '608' '380'
 '579' '546' '658' '526' '351' '669' '792' '585' '681' '734' '355' '417'
 '650' '630' '106' '510' '528' '446' '577' '631' '547' '423' '155' '160'
 '637' '640' '601' '808' '756' '408' '613' '604' '187' '431' '343' '509'
 '621' '437' '115' '116' '237' '137' '450' '514' '406' '113' '498' '234'
 '560' '558' '562' '218' '392' '231' '545' '685' '484' '680' '668' '529'
 '527' '609' '612' '634' '617' '635' '633' '489' '618' '199' '438' '615'
 '627' '490' '496' '614' '625' '142' '464' '697' '603' '242' '232' '557'
 '800' '607' '796' '553' '642' '372' '643' '645']
13 ['359' '383']
14 ['326' '174' '178' '135' '240' '182' '142' '139' '705' '181' '179' '101'
 '016' '176' '177' '100' '175' '408' '341' '681' '797' '581' '583' '200'
 '698' '795' '368' '363' '259' '683' '218' '536' '701' '231' '586' '420'
 '345' '697' '349' '708' '201' '202' '203' '796' '589' '418' '331' '330'
 '063' '168' '141' '343' '184' '449' '406' '510' '382' '152' '415' '304'
 '372' '340' '411' '419' '438' '049' '450' '022' '261' '739' '199' '680'
 '023' '041' '026' '011' '009' '017' '019' '025' '035' '032' '040' '030'
 '644' '265' '034' '262' '339' '405' '213' '070' '146' '575' '190' '693'
 '327' '329' '763' '398' '196' '682' '792' '322' '138' '246' '134' '015'
 '039' '260' '263' '020' '634' '637' '014' '167' '366' '050' '640' '029'
 '745' '166' '530' '045' '150' '148' '051' '153' '214' '799' '006' '129'
 '249' '332' '147' '684' '060' '352' '047' '362' '578' '399' '335' '694'
 '685' '665' '059' '334' '008' '642' '028' '053' '005' '137' '169' '253'
 '252' '387' '392' '131' '102' '258' '220' '385' '325' '194' '794' '400'
 '105' '242' '323' '793' '161' '197' '210' '057' '529' '065' '790' '212'
 '173' '764' '054' '215' '132' '130' '251' '151' '676' '068' '317' '393'
 '384' '394' '241' '191' '066' '133' '010' '171' '769' '320' '33 ' '018'
 '423' '585' '643' '696' '636' '  5' '535' '198' '007']
15 ['235' '165' '168' '477' '317' '215' '242' '229' '230' '119' '398' '394'
 '118' '405' '129' '126' '207' '395' '384' '179' '138' '299' '390' '388'
 '258' '183' '117' '366' '287' '116' '163' '186' '391' '339' '400' '479'
 '481' '243' '241' '153' '396' '251' '476' '392' '365' '246' '393' '316'
 '397' '402' '295' '381' '331' '329' '383' '361' '419' '421' '362' '332'
 '313' '423' '336' '373' '422' '244' '314' '386' '315' '245' '385' '216'
 '232' '408' '399' '410' '406' '271' '272' '192' '175' '208' '167' '334'
 '475' '199' '276' '274' '169' '337' '409' '333' '374' '309' '187' '210'
 '278' '180' '424' '171' '418' '369' '221' '387' '247' '158' '154' '236'
 '145' '203' '142' '213' '480' '312' '120' '092' '340' '184' '341' '330'
 '198' '262' '281' '282' '206' '182' '254' '181' '567' '264' '228' '308'
 '250' '238' '284' '266' '202' '200' '178' '201' '224' '304' '220' '217'
 '283' '260' '239' '306' '255' '189' '240' '218' '261' '286' '293' '225'
 '214' '305' '263' '231' '234' '172' '412' '237' '268' '269' '288' '414'
 '259' '223' '256' '310' '307' '311' '267' '401' '403' '146' '404' '292'
 '300' '188' '280' '367' '372' '265' '212' '285' '249' '325' '194' '328'
 '270' '382' '413' '389' '298' '086' '170' '454' '123' '318' '319' '326'
 '327' '222' '193' '144' '185' '191' '050' '322']
16 ['032' '001']
17 ['026' '020' '001' '002' '009' '008' '017']
18 ['009' '012' '011']
19 ['003']
20 ['018' '015' '019' '017' '021' '020' '016' '022']
21 ['018' '005' '033' '009' '011' '012' '007' '008' '016' '010' '015' '017'
 '004' '034' '014']
22 ['255' '258' '257' '076' '058' '128' '131' '217' '220' '219' '127' '031'
 '043' '062' '013' '087' '025' '022' '108' '111' '110' '317' '323' '297'
 '016' '056' '059' '268' '246' '060' '055' '322' '304' '109' '320' '324'
 '298' '300' '154' '118' '267' '119' '002' '123' '270' '107' '136' '137'
 '274' '104' '283' '305' '152' '084' '126' '102' '117' '120' '116' '105'
 '153' '271' '024' '072' '075' '0  ' '113' '079' '103' '007' '156' '083'
 '021' '035' '082']
23 ['017']
25 ['238']
26 ['006' '004']
27 ['006' '008' '004']
28 ['018' '015' '014']
29 ['005' '002']
30 ['-01' '057']
31 ['053' '034' '024' '001' '032' '010' '012' '013' '047' '003' '005' '009'
 '038']
32 ['050' '063' '064' '070' '052' '059' '008' '058' '065' '071' '066' '087'
 '067' '089' '086' '102' '062' '101' '068' '056' '022']
33 ['005' '009' '008' '004' '010' '001' '012']
34 ['257']
35 ['012' '018' '028' '009' '023']
37 ['019' '022' '017' '028' '024' '013' '025' '021' '012' '016' '011' '010']
38 ['021' '009' '020' '018' '017' '008' '019' '003' '045']
39 ['013' '009' '004' '003' '008' '005' '006' '014' '011' '001' '010' '002'
 '012' '007']
40 ['121' '110' '320' '460' '425' '068' '217' '182' '025' '003' '169' '168'
 '172' '067' '173' '197']
41 ['015' '021' '004' '009' '011' '003' '019' '012' '002' '014' '016' '-78']
42 ['002' '005' '247' '003' '006' '008' '001']
43 ['034' '011' '042' '031' '015' '025' '030' '014' '016' '012' '010' '008'
 '007' '001' '022' '013' '020' '009' '029' '021' '019' '026' '024']
44 ['005' '001' '004']
45 ['001' '004' '005']
46 ['030' '070' '068' '044' '032' '042' '033' '067' '046' '097' '029' '066'
 '031' '012']
47 ['055' '048' '062' '061' '053' '056']
48 ['004' '005' '008' '007' '002' '020']
49 ['001' '003']
50 ['001' '011' '013' '017' '006' '005' '009' '010']
51 ['013' '023' '136' '077' '121' '024' '018' '129' '207' '123' '133' '124'
 '113' '170' '118' '119' '025' '027' '080' '111' '100' '020' '072' '042'
 '101' '090' '103' '097' '071' '096' '095' '099' '014' '102' '016' '106'
 '110' '161' '162' '139' '125' '048' '047' '017' '112' '015' '140' '138'
 '126' '051' '022' '049' '105' '029' '046' '094' '079' '108' '074' '073'
 '053' '052']
52 ['036' '087' '014' '092' '108' '105' '106' '040' '053' '104' '011' '006'
 '008' '009' '010' '020' '090' '003' '002' '015' '089' '012' '013']
54 ['003' '004' '002' '005' '001']
55 ['004' '008' '011' '001' '003']
56 ['001' '002']
58 ['032' '034' '087' '084' '060' '028' '043' '053' '055' '005' '052' '049'
 '056' '013' '008' '027' '020']
59 ['045' '009' '036' '037' '022' '015' '065' '075' '071' '050' '033' '042'
 '073' '040' '049' '026' '038' '002' '039' '046' '079' '018' '076' '035'
 '058' '012' '048' '043' '025' '027' '024' '004' '074' '047' '021' '086'
 '016' '041' '072' '011' '019' '053' '081' '088' '084' '096' '001' '030'
 '017' '028' '029' '013']
60 ['021' '019' '050' '006' '024' '036' '022' '009' '008' '005' '013' '002'
 '012' '061' '010' '014' '017' '018' '001']
61 ['153' '024' '020' '023' '021' '061' '131' '132' '022' '115' '136' '114'
 '035' '120' '112' '110' '125' '032' '038' '154' '113' '029' '133' '144'
 '041' '015' '145' '427' '033' '063' '054' '008' '013' '055' '134' '075'
 '163' '138' '026' '060' '067' '170' '158' '016' '001' '102' '072' '076'
 '066' '017' '011' '010' '037']
62 ['013' '030' '033' '028' '017' '051' '005' '198' '020' '003' '018' '055'
 '200' '012' '040' '224' '086' '084' '011' '223' '019' '031' '050' '090'
 '191' '091' '049']
65 ['066' '086' '021' '001' '024' '078' '038' '019' '096' '091' '123' '081'
 '032' '124' '125' '003' '006' '010' '042' '008' '013' '009' '012' '020'
 '022' '017' '005']
68 ['003' '117' '002' '001']
71 ['109']
76 ['001' '002']
77 ['021' '011' '004' '006' '009' '010' '007']
78 ['007' '003' '001' '006' '004']
81 ['258' '105' '248' '127' '252' '153' '109' '164' '235' '137' '148' '136']
82 ['008']
84 ['007' '003' '005']
85 ['001']
86 ['023' '004' '008' '019' '001' '007']
88 ['020' '021' '015' '023' '032' '013' '036' '037' '004' '005']
89 ['011' '050' '022' '010' '003' '021' '009' '006' '029' '007' '013' '001'
 '024' '023' '005' '004' '041' '049' '008' '002']
9  [' 11']
90 ['006']
91 ['258']
94 ['013' '011' '024' '022' '026' '005' '020' '014' '016' '012' '021' '015'
 '003' '019' '018' '023' '025']
95 ['004' '003' '008' '002' '007' '005']
96 ['097' '091' '102' '104' '148' '105' '103' '101' '096' '072' '109' '113'
 '085' '108' '066' '071' '111' '095' '092']
97 ['082' '081' '068' '067' '086' '140' '065' '064' '012' '039' '034' '083'
 '072' '076' '093' '070' '019' '063' '021' '085' '029' '095' '132' '020'
 '136' '027' '144' '143' '142' '131']
98 ['145' '024' '131' '039' '142' '034']
for i,g in pdf.groupby('shipclass'):
    if g.shipno.count() <=5:
        pdf.loc[g.index,'VesselMeta_Availability'] = False

This brings us to 637853 samples which is ~1k samples less than the entire dataset when dropping non-numeric entries. We will try to match these with known vessels from the Deck 195 MetaData reconcstruction project.

There may be other ID’s that should be ommited (e.g. -1, or ‘0 ‘), and a few that need to be explored (e.g. is ‘1’,‘01’,’1 ’ all the same or are they different?). Sample counts that are very low will throw these out regardless.

Vessel Class ID’s assumed in error or not worth exploring due to low counts
ClassID / Counts:
1 3
2 1
4 3
5 1
6 3
8 1
0 1
00 1
1 2
71 1
9 1

This brings us to 637834 entries to try to match to known vessels