#!/usr/bin/env python import sys import xml.sax import xml.sax.saxutils import string import os import stat import httplib #old_plates_dir = '../raw_data/FAA/TPPs/current/d-TPP/Published_pdfs' old_plates_dir = '../raw_data/FAA/TPPs/current/plates' new_plates_dir = '../raw_data/FAA/TPPs/new/plates' TPP_version = '0405' plates_NACO_host = 'www.naco.faa.gov' plates_NACO_path = '/d-tpp/' + TPP_version + '/' class NACO_connection(httplib.HTTPConnection): def __init__(self): #httplib.HTTPConnection.__init__(self, plates_NACO_host) #self.set_debuglevel(9) None def TPP_plate_size(self, version, plate_name): httplib.HTTPConnection.__init__(self, plates_NACO_host) self.request("HEAD", '/d-tpp/' + version + '/' + plate_name) response = self.getresponse() if not response.status == 200: print 'response status=%d' % (response.status) return(0) sys.exit(1) return(int(response.getheader('content-length'))) def TPP_get_plate(self, version, plate_name, plate_pathname): print 'TPP_get_plate(%s, %s, %s)' % (version, plate_name, plate_pathname) httplib.HTTPConnection.__init__(self, plates_NACO_host) self.request("GET", '/d-tpp/' + version + '/' + plate_name) response = self.getresponse() if response.status == 404: print '%s is bogus.' % (plate_name) return(1) if not response.status == 200: print 'response status=%d' % (response.status) sys.exit(1) temp_pathname = '%s/temp.pdf' % (new_plates_dir) temp_file = open(temp_pathname, 'w') temp_file.write(response.read()) temp_file.close() cmd = 'mv %s %s' % (temp_pathname, plate_pathname) # print cmd return(os.system(cmd)) class my_handler(xml.sax.saxutils.ESISDocHandler): def __init__(self, out, NACO): self.depth = 0 self.plate_counts = {} self.iap_names = [] self.NACO = NACO xml.sax.saxutils.ESISDocHandler.__init__(self, out) # The PDF files on the FAA's CD use random case. Create a map. self.old_pdf_filenames_map = {} for filename in os.listdir(old_plates_dir): self.old_pdf_filenames_map[string.upper(filename)] = filename def characters(self, data, foo, data_len): if self.element_name == 'chart_name': self.chart_name = data elif self.element_name == 'pdf_name': self.pdf_name = data elif self.element_name == 'chartseq': self.chart_seq = data elif self.element_name == 'chart_code': self.chart_code = data #print '\t' * self.depth, '**%s**' % (data) def startElement(self, name, attribute_map): self.depth += 1 self.element_name = name if name == 'state_name': self.state_name = attribute_map['ID'] elif name == 'city_name': self.city_name = attribute_map['ID'] elif name == 'airport_name': self.airport_name = attribute_map['ID'] self.airport_ident = attribute_map['apt_ident'] #print '\t' * self.depth, '__%s__' % (name) #for key in attribute_map.keys(): # print '\t' * self.depth, '%s="%s"' % (key, attribute_map[key]) #print dir(attribute_map) #sys.exit(0) if name == 'airport_name': self.plate_count = 0 def endElement(self, name): self.depth -= 1 if name == 'record': record_identifier = '%s-%s' % (self.airport_ident, self.pdf_name.split('.')[0]) # record_identifier, # self.airport_ident, # self.chart_seq, # self.chart_code, # self.chart_name, # real_pdf_filename self.plate_count += 1 if self.chart_code == 'IAP': None new_plate_pathname = new_plates_dir + '/' + self.pdf_name # Do we already have it? if os.path.exists(new_plate_pathname): # print 'already have %s' % (self.pdf_name) None # Is it in the old directory? elif self.pdf_name in self.old_pdf_filenames_map.keys(): # print 'already had %s in previous version' % (self.pdf_name) old_plate_pathname = '%s/%s' % (old_plates_dir, self.old_pdf_filenames_map[self.pdf_name]) old_plate_stats = os.stat(old_plate_pathname) old_plate_size = old_plate_stats[stat.ST_SIZE] # print 'plate was %d bytes' % (old_plate_size) new_plate_size = self.NACO.TPP_plate_size( version=TPP_version, plate_name=self.pdf_name ) # print 'plate is %d bytes' % (new_plate_size) # It's a bogus plate. if new_plate_size == 0: print '%s is bogus.' % (self.pdf_name) None # If the old and new files are the same size, # assume that they're identical. elif old_plate_size == new_plate_size: cmd = 'cp -l %s %s' % ( old_plate_pathname, new_plate_pathname ) # print cmd os.system(cmd) else: print '%s changed' % (self.pdf_name) self.NACO.TPP_get_plate( version=TPP_version, plate_name=self.pdf_name, plate_pathname=new_plate_pathname, ) else: print 'need to download %s' % (self.pdf_name) self.NACO.TPP_get_plate( version=TPP_version, plate_name=self.pdf_name, plate_pathname=new_plate_pathname, ) elif name == 'airport_name': #print self.airport_ident self.plate_counts[self.airport_ident] = self.plate_count #print '%s: %s' % (self.airport_ident, self.plate_count) #record_identifier, #location_identifier, #chart_sequence, #chart_code, #chart_name, #pdf_name if 1: TPP_parser=xml.sax.saxexts.make_parser() TPP_parser.setErrorHandler(xml.sax.saxutils.ErrorPrinter()) xml_input_filename = '../raw_data/FAA/TPPs/new/digTPP_hier_All_Watt.xml' out=sys.stdout TPP_handler = my_handler( out=sys.stdout, NACO=NACO_connection() ) TPP_parser.setDocumentHandler(TPP_handler) TPP_parser.parseFile(open(xml_input_filename)) plate_counts = TPP_handler.plate_counts