#!/usr/bin/env python import anydbm import string import re import sys locations = anydbm.open('./locations', 'c') addresses = locations.keys() #print addresses #print len(addresses) places = open('/var/www/aviationtoolbox/raw_data/Census/TIGER_2000/places2k.txt', 'r') # Columns 1-2: United States Postal Service State Abbreviation # Columns 3-4: State Federal Information Processing Standard (FIPS) code # Columns 5-9: Place FIPS Code # Columns 10-73: Name # Columns 74-82: Total Population (2000) # Columns 83-91: Total Housing Units (2000) # Columns 92-105: Land Area (square meters) - Created for statistical purposes only. # Columns 106-119: Water Area(square meters) - Created for statistical purposes only. # Columns 120-131: Land Area (square miles) - Created for statistical purposes only. # Columns 132-143: Water Area (square miles) - Created for statistical purposes only. # Columns 144-153: Latitude (decimal degrees) First character is blank or "-" denoting North or South latitude respectively # Columns 154-164: Longitude (decimal degrees) First character is blank or "-" denoting East or West longitude respectively #city_re = re.compile('^([A-Z][a-z]+\.?([ -][A-Z][a-z]+\.?)*)(.*)$') #city_re = re.compile('^(.*)((city)|(town)|(borough)) *$') #city_re = re.compile('^(.*)( (city|town|borough|CDP|village|comunidad|urbana|\(balance\)|municipality)) *$') city_with_suffix_re = re.compile('^(.*[^ ])( (city|town|borough|CDP|village|comunidad|urbana|\(balance\)|municipality))+ *$') for line in places.readlines(): state = line[:2] city = line[9:73] latitude = line[143:153] longitude = line[153:163] if 0 and not 'Indianapolis' in city: continue while 1: match = city_with_suffix_re.match(city) if not match: break #print city city = match.group(1) location_string = string.join(['', string.lower(city), string.lower(state)], '|') if 0 and not locations.has_key(location_string): print line[9:73], state, city None locations[location_string] = '%s|%s' % (longitude, latitude)