|
| 1 | +import os |
| 2 | +import json |
| 3 | +import datetime |
| 4 | +from pprint import pprint |
| 5 | +from textwrap import dedent |
| 6 | +from collections import defaultdict |
| 7 | +from contextlib import contextmanager |
| 8 | + |
| 9 | +from .. import config, iterate |
| 10 | +from ..common import create_unique_path, parse_date_str |
| 11 | + |
| 12 | + |
| 13 | +def gtfs_escape(val): |
| 14 | + return val.replace(',', '').replace('\n', ' ') |
| 15 | + |
| 16 | + |
| 17 | +def create_calendar(target_path, date: datetime.date): |
| 18 | + service_id = '1' |
| 19 | + with open(os.path.join(target_path, 'calendar.txt'), 'w') as f: |
| 20 | + f.writelines([ |
| 21 | + 'service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date\n', |
| 22 | + f'{service_id},1,1,1,1,1,1,1,{date.strftime("%Y%m%d")},{date.strftime("%Y%m%d")}\n' |
| 23 | + ]) |
| 24 | + return service_id |
| 25 | + |
| 26 | + |
| 27 | +@contextmanager |
| 28 | +def open_files(target_path): |
| 29 | + with open(os.path.join(target_path, 'stops.txt'), 'w') as f_stops: |
| 30 | + with open(os.path.join(target_path, 'routes.txt'), 'w') as f_routes: |
| 31 | + with open(os.path.join(target_path, 'trips.txt'), 'w') as f_trips: |
| 32 | + with open(os.path.join(target_path, 'stop_times.txt'), 'w') as f_stop_times: |
| 33 | + yield f_stops, f_routes, f_trips, f_stop_times |
| 34 | + |
| 35 | + |
| 36 | +def create_data(stats, target_path, service_id, date, start_hour, end_hour, min_lon, min_lat, max_lon, max_lat): |
| 37 | + added_stop_ids = set() |
| 38 | + added_route_ids = set() |
| 39 | + added_trip_ids = set() |
| 40 | + with open_files(target_path) as (f_stops, f_routes, f_trips, f_stop_times): |
| 41 | + f_stops.write('stop_id,stop_name,stop_lat,stop_lon,location_type\n',) |
| 42 | + f_routes.write('route_id,route_short_name,route_type\n') |
| 43 | + f_trips.write('route_id,service_id,trip_id\n') |
| 44 | + f_stop_times.write('trip_id,arrival_time,departure_time,stop_id,stop_sequence\n') |
| 45 | + recorded_at_time_from = datetime.datetime.combine(date, datetime.time(start_hour), datetime.timezone.utc) |
| 46 | + recorded_at_time_to = datetime.datetime.combine(date, datetime.time(end_hour, 59, 59), datetime.timezone.utc) |
| 47 | + for item in iterate('/siri_ride_stops/list', { |
| 48 | + 'gtfs_stop__lat__greater_or_equal': min_lat, |
| 49 | + 'gtfs_stop__lat__lower_or_equal': max_lat, |
| 50 | + 'gtfs_stop__lon__greater_or_equal': min_lon, |
| 51 | + 'gtfs_stop__lon__lower_or_equal': max_lon, |
| 52 | + 'gtfs_date_from': date, |
| 53 | + 'gtfs_date_to': date, |
| 54 | + 'siri_vehicle_location__recorded_at_time_from': recorded_at_time_from, |
| 55 | + 'siri_vehicle_location__recorded_at_time_to': recorded_at_time_to, |
| 56 | + 'siri_ride__scheduled_start_time_from': recorded_at_time_from - datetime.timedelta(hours=10), |
| 57 | + 'siri_ride__scheduled_start_time_to': recorded_at_time_to + datetime.timedelta(hours=10), |
| 58 | + 'limit': -1, |
| 59 | + }, limit=None): |
| 60 | + svl_recorded_at_time = item['nearest_siri_vehicle_location__recorded_at_time'].strftime("%H:%M:%S") |
| 61 | + gs_name = gtfs_escape(f'{item["gtfs_stop__city"]}: {item["gtfs_stop__name"]}') |
| 62 | + gs_id = item['gtfs_stop_id'] |
| 63 | + if gs_id not in added_stop_ids: |
| 64 | + added_stop_ids.add(gs_id) |
| 65 | + f_stops.write(f'{gs_id},{gs_name},{item["gtfs_stop__lat"]},{item["gtfs_stop__lon"]},0\n') |
| 66 | + stats['stops'] += 1 |
| 67 | + grt_id = item['gtfs_ride__gtfs_route_id'] |
| 68 | + if grt_id not in added_route_ids: |
| 69 | + added_route_ids.add(grt_id) |
| 70 | + f_routes.write(f'{grt_id},{gtfs_escape(item["gtfs_route__route_short_name"])},3\n') |
| 71 | + stats['routes'] += 1 |
| 72 | + gr_id = item['siri_ride__gtfs_ride_id'] |
| 73 | + if gr_id not in added_trip_ids: |
| 74 | + added_trip_ids.add(gr_id) |
| 75 | + f_trips.write(f'{grt_id},{service_id},{gr_id}\n') |
| 76 | + stats['trips'] += 1 |
| 77 | + f_stop_times.write(f'{gr_id},{svl_recorded_at_time},{svl_recorded_at_time},{gs_id},{item["order"]}\n') |
| 78 | + stats['stop_times'] += 1 |
| 79 | + if stats["stop_times"] > 1 and stats["stop_times"] % 1000 == 0: |
| 80 | + print(f'saved {stats["stop_times"]} stop times...') |
| 81 | + |
| 82 | + |
| 83 | +def main(date, start_hour, end_hour, bbox, target_path=None): |
| 84 | + if not target_path: |
| 85 | + target_path = create_unique_path(os.path.join(config.URBANACCESS_DATA_PATH, 'fake_gtfs')) |
| 86 | + target_path_feed = os.path.join(target_path, 'siri_feed') |
| 87 | + os.makedirs(target_path_feed) |
| 88 | + date = parse_date_str(date) |
| 89 | + start_hour = int(start_hour) |
| 90 | + end_hour = int(end_hour) |
| 91 | + min_lon, min_lat, max_lon, max_lat = [float(v.strip()) for v in bbox.split(',')] |
| 92 | + print(dedent(f''' |
| 93 | + creating fake gtfs data |
| 94 | + target_path={target_path} |
| 95 | + date: {date} |
| 96 | + hours: {start_hour} - {end_hour} |
| 97 | + bbox: {min_lon},{min_lat} - {max_lon},{max_lat} |
| 98 | + ''')) |
| 99 | + stats = defaultdict(int) |
| 100 | + service_id = create_calendar(target_path_feed, date) |
| 101 | + create_data(stats, target_path_feed, service_id, date, start_hour, end_hour, min_lon, min_lat, max_lon, max_lat) |
| 102 | + with open(os.path.join(target_path, 'metadata.json'), 'w') as f: |
| 103 | + json.dump({ |
| 104 | + 'start_hour': start_hour, |
| 105 | + 'end_hour': end_hour, |
| 106 | + 'bbox': [min_lon, min_lat, max_lon, max_lat] |
| 107 | + }, f) |
| 108 | + pprint(dict(stats)) |
| 109 | + print(f'Fake gtfs data successfully stored at "{target_path}"') |
| 110 | + return target_path |
0 commit comments