#!/usr/bin/env python import gadfly import sys import FAA_ATA100 import xml.sax import xml.sax.saxutils import string import os gadfly_db = gadfly.gadfly() gadfly_db.startup("FAA", "./Gadfly_databases/FAA.1") gadfly_cursor = gadfly_db.cursor() class my_handler(xml.sax.saxutils.ESISDocHandler): def __init__(self, out, gadfly_cursor): self.depth = 0 self.plate_counts = {} self.iap_names = [] self.gadfly_cursor = gadfly_cursor xml.sax.saxutils.ESISDocHandler.__init__(self, out) # The PDF files use random case. Create a map. self.pdf_filenames_map = {} for filename in os.listdir('../raw_data/FAA/approaches/current/d-TPP/Published_pdfs'): self.pdf_filenames_map[string.upper(filename)] = filename def characters(self, data, foo, data_len): if self.element_name == 'chart_name': self.chart_name = data elif self.element_name == 'pdf_name': self.pdf_name = data elif self.element_name == 'chartseq': self.chart_seq = data elif self.element_name == 'chart_code': self.chart_code = data #print '\t' * self.depth, '**%s**' % (data) def startElement(self, name, attribute_map): self.depth += 1 self.element_name = name if name == 'state_name': self.state_name = attribute_map['ID'] elif name == 'city_name': self.city_name = attribute_map['ID'] elif name == 'airport_name': self.airport_name = attribute_map['ID'] self.airport_ident = attribute_map['apt_ident'] #print '\t' * self.depth, '__%s__' % (name) #for key in attribute_map.keys(): # print '\t' * self.depth, '%s="%s"' % (key, attribute_map[key]) #print dir(attribute_map) #sys.exit(0) if name == 'airport_name': self.plate_count = 0 def endElement(self, name): self.depth -= 1 if name == 'record': if self.pdf_filenames_map.has_key(self.pdf_name): real_pdf_filename = self.pdf_filenames_map[self.pdf_name] else: real_pdf_filename = '' print 'no file for %s' % (self.pdf_name) record_identifier = '%s-%s' % (self.airport_ident, self.pdf_name.split('.')[0]) db_record = ( record_identifier, self.airport_ident, self.chart_seq, self.chart_code, self.chart_name, real_pdf_filename ) # print db_record self.gadfly_cursor.execute(self.airport_insert, db_record) self.plate_count += 1 if self.chart_code == 'IAP': None elif name == 'airport_name': #print self.airport_ident self.plate_counts[self.airport_ident] = self.plate_count #print '%s: %s' % (self.airport_ident, self.plate_count) airport_insert = """ insert into approaches( record_identifier, location_identifier, chart_sequence, chart_code, chart_name, pdf_name ) values (?, ?, ?, ?, ?, ?) """ if 1: gadfly_cursor.execute(""" create table approaches ( record_identifier varchar, location_identifier varchar, chart_sequence varchar, chart_code varchar, chart_name varchar, pdf_name varchar ) """) approaches_parser=xml.sax.saxexts.make_parser() approaches_parser.setErrorHandler(xml.sax.saxutils.ErrorPrinter()) xml_input_filename = '../raw_data/FAA/approaches/current/d-TPP/xml/digTPP_hier_All_Watt.xml' out=sys.stdout approaches_handler = my_handler(out=sys.stdout, gadfly_cursor=gadfly_cursor) approaches_parser.setDocumentHandler(approaches_handler) approaches_parser.parseFile(open(xml_input_filename)) plate_counts = approaches_handler.plate_counts if 1: gadfly_cursor.execute(""" create table airports ( location_identifier varchar, facility_name varchar, facility_type varchar, facility_use varchar, associated_city_name varchar, associated_state_post_office_code varchar, reference_point_longitude float, reference_point_latitude float, plates_count integer ) """) airport_insert = """ insert into airports ( location_identifier, facility_name, facility_type, facility_use, associated_city_name, associated_state_post_office_code, reference_point_longitude, reference_point_latitude, plates_count ) values (?, ?, ?, ?, ?, ?, ?, ?, ?) """ FAA_ATA100_filename = '../raw_data/FAA/ATA-100/current.zip' FAA_ATA100_data = FAA_ATA100.ATA100zip(FAA_ATA100_filename) airports_parsed = FAA_ATA100_data.parse('APT') for record in airports_parsed: rti = record['RTI'] if rti == 'APT': location_identifier = record['LOCATION IDENTIFIER'] if plate_counts.has_key(location_identifier): plate_count = plate_counts[location_identifier] else: plate_count = 0 db_record = ( location_identifier, record['OFFICIAL FACILITY NAME'], record['LANDING FACILITY TYPE'], record['FACILITY USE'], record['ASSOCIATED CITY NAME'], record['ASSOCIATED STATE POST OFFICE CODE'], FAA_ATA100.seconds_to_degrees(record['AIRPORT REFERENCE POINT LONGITUDE (SECONDS)']), FAA_ATA100.seconds_to_degrees(record['AIRPORT REFERENCE POINT LATITUDE (SECONDS)']), plate_count ) # print db_record gadfly_cursor.execute(airport_insert, db_record) gadfly_db.commit() gadfly_db.commit()