-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmeetup_csv_gen.py
183 lines (141 loc) · 7.17 KB
/
meetup_csv_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import requests
import pandas as pd
import json
import datetime as dt
import time
#=========================================================================================
# Automatic CSV File Generator for Meetup.com API Data
# Created by: Andrew Graves
# Date: Jan 17, 2018
#=========================================================================================
"""Prompts user for API key, zipcode, and search radius (0 - 100 miles).
Stores api key in api_key.txt for later user.
Exports a csv file containing rows with the following meetup group information:
- name of group
- group url (used as the main group identifier for the meetup API)
- city
- latitude of meetup location
- longitude of meetup location
- meetup category (Social, Tech, Arts, etc.)
- datetime of group creation
- status (active, grace)
- # of current members
- join mode (open, approval)
- # of previous events held
- datetime of most recently past event
- # of 'yes' rsvps for most recently past event
"""
# Meetup.com api instructions can be found here: https://www.meetup.com/meetup_api/docs/find/groups/
def get_api_key():
"""Checks if there is an api_key.txt file in the folder.
If none exists, prompts the user for an api key and store it in a newly created file.
"""
try:
with open("api_key.txt", "r") as f: #try to open up the txt file and read in api key
api_key = f.read()
print("***User API key is on file.***")
print("")
key = str(api_key)
return key
except: #if file does not already exist
with open("api_key.txt", "w+") as f: #create the file
print("**(You can find your API key for meetup.com at https://secure.meetup.com/meetup_api/key/)")
print(" ")
key = input("API key: ") #prompt user for api key
f.write(key) #write the user input to the api_key.txt file for later use
return key
def query_api():
"""Queries the meetup.com API and returns a list of dictionaries containing the requested info on meetup groups.
Responses are in batches of 200 entries. There is a time delay between reqs to not exceed the API rate limit.
Once we receive a response containing less than 200 entries, we know we have collected all information we need."""
finished = False #we are just beginning!
batch = 0 #set our first batch number
raw_data = [] #create our list to eventually return
while finished == False: #keep looping until we run out of data to find
parameters = {"key":key, "sign":"true", "page":"200", "offset":batch, "zip":zip, "radius":radius, "only":"category,created,urlname,city,join_mode,last_event,members,name,past_event_count,status,lat,lon", "fields":"last_event,past_event_count"}
response = requests.get("https://api.meetup.com/find/groups", params = parameters) #make the API request
status = response.status_code #get the status of our request
data = response.json() #convert the JSON data to python dictionaries
print("Batch number: {}".format(batch+1))
if status == 200: #things are ok
print("Server request: OK")
if status == 401: #not ok
print("***Bad server request!***")
exit()
if status != 200 and status != 401: #else
print("Server request: Status Code {}".format(status))
print("Number of groups returned: " + str(len(data)))
print(" ")
raw_data.append(data) #add the raw data to our list
if len(data) < 200: #if we get less than 200 entries, stop the loop and return our final list
finished = True
print(" ")
print("***Finished!***")
return raw_data
else: #if not, pause before making a new request with a new batch number
time.sleep(0.25)
batch+=1
def convert_to_df(raw_data):
"""Takes in a list of dictionaries, does some cleaning, and converts each to a pandas df.
Returns a single dataframe combining all entries."""
all_dfs = [] #master list of dataframes
for i in range(0,len(raw_data)): #for each dictionary in our raw_data list
utc_found = False #Search the dictionary for an entry containing the utc_offset value to adjust utc to local time
while utc_found == False:
for each in raw_data[i]:
if "last_event" in each:
utc_offset = int(each["last_event"]["utc_offset"])
utc_found = True
else:
next
for each in raw_data[i]: #clean up each dictionary
if "category" in each: #correct the category name
each["category"] = each["category"]["name"]
each["created"] = each["created"] + utc_offset #correct the founding date to the local timezone
if "past_event_count" in each: #if there is a past event counter make it an integer
each["past_event_count"] = int(each["past_event_count"])
if "last_event" in each: #if there was a past event, clean up the rsvp_count and the time of the event
last = each["last_event"]
each["last_rsvp"] = int(last["yes_rsvp_count"]) #make a new, separate dict key for the rsvp count
each["last_event"] = int(last["time"] + utc_offset) #correct the last event date with UTC offset
data = pd.DataFrame(raw_data[i]) #convert to a df
all_dfs.append(data)#add to our master list
return pd.concat(all_dfs) #return our final dataframe by combining all from the master list
def convert_to_dt(row):
"""Attempt to convert values to datetime objects. Skip any values that give us trouble."""
try:
row = int(row)
row = dt.datetime.utcfromtimestamp(row/1000)
except:
next
return row
#initialize
banner = """
#=======================================================
# Automatic CSV File Generator for Meetup.com API Data
# Created by: Andrew Graves
# Date: Jan 17, 2018
#=======================================================
"""
print(banner)
#establish our api key
key = get_api_key()
#prompt user for a zip and search radius
zip = input("Zipcode to search: ")
radius = input("Search radius in miles (0.0 - 100.0): ")
#get our raw data
raw_data = query_api()
#convert our raw data to a single pandas df
df = convert_to_df(raw_data)
#Reorder the columns
cols = ["name","urlname","city","lat","lon","category","created","status","members","join_mode","past_event_count","last_event","last_rsvp"]
df = df[cols]
#convert the timestamp values in "created" and "last_event" columns to datetime objects
df["created"] = df["created"].apply(convert_to_dt)
df["last_event"] = df["last_event"].apply(convert_to_dt)
#convert past_event_count and last_rsvp count to integers
df["past_event_count"] = df["past_event_count"].fillna(0).astype(int, errors="raise") #set any missing values to 0
df["last_rsvp"] = df["last_rsvp"].fillna(0).astype(int, errors="raise") #set any missing values to 0
#export our df and call it a day!
df.to_csv("meetup_groups.csv", index=False)
print("***Successfully exported file as 'meetup_groups.csv'***")