#!/usr/bin/env python cache_dir = '../raw_data/FAA/TFRs' import urllib import sgmllib import htmllib import formatter import string import re def get_list(): TFR_list = urllib.urlopen('http://tfr.faa.gov/tfr/jsp/list.jsp', cache_dir + 'list.html') print dir(TFR_list) print TFR_list.url while 1: line = TFR_list.readline() print line def tuples_to_dict(t): dict = {} for (var, val) in t: dict[var] = val return(dict) class mywriter(formatter.NullWriter): def __init__(self): formatter.NullWriter.__init__(self) def send_flowing_data(self, str): print str class myparser(htmllib.HTMLParser): #class myparser(sgmllib.SGMLParser): def __init__(self, fmt, limit=-1): self.formatter = fmt self.hrefs = [] htmllib.SGMLParser.__init__(self, fmt) def anchor_bgn(self, href, name, type): self.hrefs.append(href) def get_hrefs(url): fmt = formatter.AbstractFormatter(formatter.NullWriter()) parser = myparser(fmt=fmt) page = urllib.urlopen(url, cache_dir + '/' + url) parser.feed(page.read()) parser.close() return parser.hrefs def get_notam_ids(): detail_re = re.compile('_([0-9])_([0-9]{4}).html$') notam_ids = [] for href in get_hrefs('http://tfr.faa.gov/tfr/jsp/list.jsp'): m = detail_re.search(href) if m: notam_id = m.groups() if not notam_id in notam_ids: notam_ids.append(notam_id) return(notam_ids) def parse_notam(notam): for (count, sentence) in enumerate(notam.split('. ')): print count, sentence for notam_id in get_notam_ids(): print notam_id page = urllib.urlopen('http://tfr.faa.gov/tfr/jsp/save_pages/detail_%s_%s.html' % notam_id) title = 'FDC %s/%s' % notam_id m = re.compile('FDC %s/%s (.*)

' % notam_id).search(page.read()) if m: parse_notam(m.groups()[0])