This repository was archived by the owner on Oct 3, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmerge.py
146 lines (140 loc) · 5.67 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Quick utilities for cleaning up the source data.
"""
import csv
import json
import calculate
from pprint import pprint
class PalmeroFTW(object):
"""
All the tricks.
"""
primary_csv_path = "./input/votos_establecimiento_caba_paso.csv"
general_csv_path = "./input/votos_establecimiento_caba_octubre.csv"
location_json_path = "./input/locales_caba_paso2013.geojson"
outheaders = [
'fake_id',
'187_total', # Partido Autodeterminacion y Libertad (Dark blue)
'501_total', # Allanza Frente para la Victoria (Light Blue)
'502_total', # Allanza UNEN (Green)
'503_total', # Allanza Union Pro (Yellow)
'505_total', # Allanza Fet. de Izq.y de los Trabajadores (Red)
'506_total', # Allanza Camino Popular (Gray)
'overall_total',
'leader',
'leader_total',
'margin_of_victory',
]
outcsv_path = "output/merged_totals.csv"
outjson_path = "output/merged_totals.geojson"
def merge(self):
"""
Merge the transformed csv file with the geojson with all
the voting locations.
"""
# Open raw geojson
json_data = open(self.location_json_path, "rb").read()
# Get it in Python
json_data = json.loads(json_data)
# Create list for stuff after we merge
merged_features = []
# Open the csv with results
csv_data = csv.DictReader(open(self.outcsv_path, 'r'))
# Key it by ID
csv_data = dict((
i['fake_id'], i
) for i in csv_data)
# Loop through the features
for row in json_data['features']:
# Figure out each fake id
fake_id = "%s-%s" % (
row['properties']['mesa_desde'],
row['properties']['mesa_hasta']
)
results_data = csv_data[fake_id]
# Filter it down to the data we want to keep
merged_dict = {
'geometry': row['geometry'],
'id': row['id'],
'properties': {
'direccion': row['properties']['direccion'],
'establecim': row['properties']['establecim'],
'seccion': row['properties']['seccion'],
'circuito': row['properties']['circuito'],
'187_total': int(results_data['187_total']),
'501_total': int(results_data['501_total']),
'502_total': int(results_data['502_total']),
'503_total': int(results_data['503_total']),
'505_total': int(results_data['505_total']),
'506_total': int(results_data['506_total']),
'overall_total': int(results_data['overall_total']),
'fake_id': results_data['fake_id'],
'leader': int(results_data['leader']),
'leader_total': int(results_data['leader_total']),
'margin_of_victory': int(results_data['margin_of_victory'])
}
}
# Toss it in the global list
merged_features.append(merged_dict)
# Structure out new merged JSON
new_json = {
'type': json_data['type'],
'features': merged_features
}
# Write it out to a file
outjson = open(self.outjson_path, "wb")
outjson.write(json.dumps(new_json))
def transform(self):
"""
Transform the results file so that there is only one row
for each precinct, with some summary stats calculated.
"""
# Open the CSV
general_csv = csv.DictReader(open(self.general_csv_path, 'r'))
# Loop through the rows
grouped_by_precinct = {}
for row in general_csv:
# And regroup them so each fake id is keyed to
# all of the list totals for that precinct
fake_id = "%s-%s" % (row['mesa_desde'], row['mesa_hasta'])
try:
grouped_by_precinct[fake_id][row['vot_parcodigo']] = row['total']
except KeyError:
grouped_by_precinct[fake_id] = {
row['vot_parcodigo']: row['total']
}
# Now loop through that
outrows = []
for fake_id, totals in grouped_by_precinct.items():
# Figure out the overall total of votes
overall_total = sum(map(int, totals.values()))
# Rank the lists from highest to lowest votes
ranking = sorted(
totals.items(),
key=lambda x:int(x[1]),
reverse=True
)
# Pull out the leader and their vote total
leader, leader_total = ranking[0]
# Calculate their margin of victory over the second place list
margin_of_victory = calculate.margin_of_victory([int(i[1]) for i in ranking])
# Start up a row to print out
outrow = [fake_id,]
# Load in the lists in the same "alphabetical" order
for list_, total in sorted(totals.items(), key=lambda x:x[0]):
outrow.append(int(total))
# Load in the extra stuff we've calculated
outrow.append(int(overall_total))
outrow.append(leader)
outrow.append(int(leader_total))
outrow.append(int(margin_of_victory))
# Add this row to the global list outside the loop
outrows.append(outrow)
# Open up a text file and write out all the data
outcsv = csv.writer(open(self.outcsv_path, 'w'))
outcsv.writerow(self.outheaders)
outcsv.writerows(outrows)
if __name__ == '__main__':
pftw = PalmeroFTW()
pftw.transform()
pftw.merge()