15
15
APPROX_WORDS = ['about' , 'around' , 'approximately' ]
16
16
WORDS_PRECEDING_TIME = ['at' , 'between' , 'to' , 'before' , 'after' ]
17
17
18
+
18
19
def pm_map_match_to_query_value (match : str ):
19
20
if len (match .rstrip ('pm' )) < 3 : # This will match something like ``5pm``.
20
21
if match .startswith ('12' ):
@@ -82,12 +83,13 @@ def get_date_from_utterance(tokenized_utterance: List[Token],
82
83
it is 1993 so we do the same here. If there is no mention of the month or day then
83
84
we do not return any dates from the utterance.
84
85
"""
86
+
85
87
dates = []
88
+
86
89
utterance = ' ' .join ([token .text for token in tokenized_utterance ])
87
90
year_result = re .findall (r'199[0-4]' , utterance )
88
91
if year_result :
89
92
year = int (year_result [0 ])
90
-
91
93
trigrams = ngrams ([token .text for token in tokenized_utterance ], 3 )
92
94
for month , tens , digit in trigrams :
93
95
# This will match something like ``september twenty first``.
@@ -107,6 +109,20 @@ def get_date_from_utterance(tokenized_utterance: List[Token],
107
109
except ValueError :
108
110
print ('invalid month day' )
109
111
112
+ fivegrams = ngrams ([token .text for token in tokenized_utterance ], 5 )
113
+ for tens , digit , _ , year_match , month in fivegrams :
114
+ # This will match something like ``twenty first of 1993 july``.
115
+ day = ' ' .join ([tens , digit ])
116
+ if month in MONTH_NUMBERS and day in DAY_NUMBERS and year_match .isdigit ():
117
+ try :
118
+ dates .append (datetime (int (year_match ), MONTH_NUMBERS [month ], DAY_NUMBERS [day ]))
119
+ except ValueError :
120
+ print ('invalid month day' )
121
+ if month in MONTH_NUMBERS and digit in DAY_NUMBERS and year_match .isdigit ():
122
+ try :
123
+ dates .append (datetime (int (year_match ), MONTH_NUMBERS [month ], DAY_NUMBERS [digit ]))
124
+ except ValueError :
125
+ print ('invalid month day' )
110
126
return dates
111
127
112
128
def get_numbers_from_utterance (utterance : str , tokenized_utterance : List [Token ]) -> Dict [str , List [int ]]:
@@ -189,6 +205,35 @@ def get_time_range_end_from_utterance(utterance: str, # pylint: disable=unused-a
189
205
190
206
return time_range_end_linking_dict
191
207
208
+ def get_costs_from_utterance (utterance : str , # pylint: disable=unused-argument
209
+ tokenized_utterance : List [Token ]) -> Dict [str , List [int ]]:
210
+ dollars_indices = {index for index , token in enumerate (tokenized_utterance )
211
+ if token .text == 'dollars' or token .text == 'dollar' }
212
+
213
+ costs_linking_dict : Dict [str , List [int ]] = defaultdict (list )
214
+ for token_index , token in enumerate (tokenized_utterance ):
215
+ if token_index + 1 in dollars_indices and token .text .isdigit ():
216
+ costs_linking_dict [token .text ].append (token_index )
217
+ return costs_linking_dict
218
+
219
+ def get_flight_numbers_from_utterance (utterance : str , # pylint: disable=unused-argument
220
+ tokenized_utterance : List [Token ]) -> Dict [str , List [int ]]:
221
+ indices_words_preceding_flight_number = {index for index , token in enumerate (tokenized_utterance )
222
+ if token .text in {'flight' , 'number' }
223
+ or token .text .upper () in AIRLINE_CODE_LIST
224
+ or token .text .lower () in AIRLINE_CODES .keys ()}
225
+
226
+ indices_words_succeeding_flight_number = {index for index , token in enumerate (tokenized_utterance )
227
+ if token .text == 'flight' }
228
+
229
+ flight_numbers_linking_dict : Dict [str , List [int ]] = defaultdict (list )
230
+ for token_index , token in enumerate (tokenized_utterance ):
231
+ if token .text .isdigit ():
232
+ if token_index - 1 in indices_words_preceding_flight_number :
233
+ flight_numbers_linking_dict [token .text ].append (token_index )
234
+ if token_index + 1 in indices_words_succeeding_flight_number :
235
+ flight_numbers_linking_dict [token .text ].append (token_index )
236
+ return flight_numbers_linking_dict
192
237
193
238
def digit_to_query_time (digit : str ) -> List [int ]:
194
239
"""
@@ -303,6 +348,7 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
303
348
'mgm' : ['MG' ],
304
349
'midwest' : ['YX' ],
305
350
'nation' : ['NX' ],
351
+ 'nationair' : ['NX' ],
306
352
'northeast' : ['2V' ],
307
353
'northwest' : ['NW' ],
308
354
'ontario' : ['GX' ],
@@ -384,11 +430,14 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
384
430
GROUND_SERVICE = {'air taxi' : ['AIR TAXI OPERATION' ],
385
431
'car' : ['RENTAL CAR' ],
386
432
'limo' : ['LIMOUSINE' ],
433
+ 'limousine' : ['LIMOUSINE' ],
387
434
'rapid' : ['RAPID TRANSIT' ],
388
435
'rental' : ['RENTAL CAR' ],
389
436
'taxi' : ['TAXI' ]}
390
437
391
- MISC_STR = {"every day" : ["DAILY" ]}
438
+ MISC_STR = {"every day" : ["DAILY" ],
439
+ "saint petersburg" : ["ST. PETERSBURG" ],
440
+ "saint louis" : ["ST. LOUIS" ]}
392
441
393
442
DAY_NUMBERS = {'first' : 1 ,
394
443
'second' : 2 ,
@@ -424,18 +473,27 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
424
473
425
474
426
475
MISC_TIME_TRIGGERS = {'lunch' : ['1400' ],
427
- 'noon' : ['1200' ]}
476
+ 'noon' : ['1200' ],
477
+ 'early evening' : ['1800' , '2000' ],
478
+ 'morning' : ['0' , '1200' ],
479
+ 'night' : ['1800' , '2400' ]}
428
480
429
481
TIME_RANGE_START_DICT = {'morning' : ['0' ],
482
+ 'mornings' : ['1200' ],
430
483
'afternoon' : ['1200' ],
484
+ 'afternoons' : ['1200' ],
485
+ 'after noon' : ['1200' ],
431
486
'late afternoon' : ['1600' ],
432
487
'evening' : ['1800' ],
433
488
'late evening' : ['2000' ]}
434
489
435
490
TIME_RANGE_END_DICT = {'early morning' : ['800' ],
436
- 'morning' : ['1200' ],
491
+ 'morning' : ['1200' , '800' ],
492
+ 'mornings' : ['1200' , '800' ],
437
493
'early afternoon' : ['1400' ],
438
494
'afternoon' : ['1800' ],
495
+ 'afternoons' : ['1800' ],
496
+ 'after noon' : ['1800' ],
439
497
'evening' : ['2200' ]}
440
498
441
499
ALL_TABLES = {'aircraft' : ['aircraft_code' , 'aircraft_description' , 'capacity' ,
@@ -477,18 +535,18 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
477
535
478
536
TABLES_WITH_STRINGS = {'airline' : ['airline_code' , 'airline_name' ],
479
537
'city' : ['city_name' , 'state_code' , 'city_code' ],
480
- 'fare' : ['round_trip_required' , 'fare_basis_code' ],
481
- 'flight' : ['airline_code' , 'flight_days' , 'flight_number' ],
538
+ 'fare' : ['round_trip_required' , 'fare_basis_code' , 'restriction_code' ],
539
+ 'flight' : ['airline_code' , 'flight_days' ],
482
540
'flight_stop' : ['stop_airport' ],
483
- 'airport' : ['airport_code' ],
484
- 'state' : ['state_name' ],
485
- 'fare_basis' : ['fare_basis_code' , 'class_type' , 'economy' ],
486
- 'class_of_service' : ['booking_class' ],
487
- 'aircraft' : ['basic_type' , 'manufacturer' ],
541
+ 'airport' : ['airport_code' , 'airport_name' ],
542
+ 'state' : ['state_name' , 'state_code' ],
543
+ 'fare_basis' : ['fare_basis_code' , 'class_type' , 'economy' , 'booking_class' ],
544
+ 'class_of_service' : ['booking_class' , 'class_description' ],
545
+ 'aircraft' : ['basic_type' , 'manufacturer' , 'aircraft_code' , 'propulsion' ],
488
546
'restriction' : ['restriction_code' ],
489
547
'ground_service' : ['transport_type' ],
490
- 'days' : ['day_name' ],
491
- 'food_service' : ['meal_description' ]}
548
+ 'days' : ['day_name' , 'days_code' ],
549
+ 'food_service' : ['meal_description' , 'compartment' ]}
492
550
493
551
DAY_OF_WEEK = ['MONDAY' , 'TUESDAY' , 'WEDNESDAY' , 'THURSDAY' , 'FRIDAY' , 'SATURDAY' , 'SUNDAY' ]
494
552
@@ -518,7 +576,10 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
518
576
'charlotte' : ['CLT' ],
519
577
'dallas' : ['DFW' ],
520
578
'detroit' : ['DTW' ],
579
+ 'houston' : ['IAH' ],
521
580
'la guardia' : ['LGA' ],
581
+ 'love field' : ['DAL' ],
582
+ 'los angeles' : ['LAX' ],
522
583
'oakland' : ['OAK' ],
523
584
'philadelphia' : ['PHL' ],
524
585
'pittsburgh' : ['PIT' ],
@@ -537,7 +598,7 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
537
598
'OK' , 'DL' , '9E' , 'QD' , 'LH' , 'XJ' , 'MG' ,
538
599
'YX' , 'NX' , '2V' , 'NW' , 'RP' , 'AT' , 'SN' ,
539
600
'OO' , 'WN' , 'TG' , 'FF' , '9N' , 'TW' , 'RZ' ,
540
- 'UA' , 'US' , 'OE' ]
601
+ 'UA' , 'US' , 'OE' , 'EA' ]
541
602
CITIES = ['NASHVILLE' , 'BOSTON' , 'BURBANK' , 'BALTIMORE' , 'CHICAGO' , 'CLEVELAND' ,
542
603
'CHARLOTTE' , 'COLUMBUS' , 'CINCINNATI' , 'DENVER' , 'DALLAS' , 'DETROIT' ,
543
604
'FORT WORTH' , 'HOUSTON' , 'WESTCHESTER COUNTY' , 'INDIANAPOLIS' , 'NEWARK' ,
@@ -551,7 +612,12 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
551
612
'MATL' , 'MMEM' , 'MMIA' , 'MMKC' , 'MMKE' , 'MMSP' , 'NNYC' , 'OOAK' , 'OONT' , 'OORL' ,
552
613
'PPHL' , 'PPHX' , 'PPIT' , 'SMSP' , 'SSAN' , 'SSEA' , 'SSFO' , 'SSJC' , 'SSLC' , 'SSTL' ,
553
614
'STPA' , 'TSEA' , 'TTPA' , 'WWAS' , 'YYMQ' , 'YYTO' ]
554
- CLASS = ['COACH' , 'BUSINESS' , 'FIRST' , 'THRIST' , 'STANDARD' , 'SHUTTLE' ]
615
+
616
+ CLASS = ['COACH' , 'BUSINESS' , 'FIRST' , 'THRIFT' , 'STANDARD' , 'SHUTTLE' ]
617
+
618
+ AIRCRAFT_MANUFACTURERS = ['BOEING' , 'MCDONNELL DOUGLAS' , 'FOKKER' ]
619
+
620
+ AIRCRAFT_BASIC_CODES = ['DC9' , '737' , '767' , '747' , 'DC10' , '757' , 'MD80' ]
555
621
556
622
DAY_OF_WEEK_INDEX = {idx : [day ] for idx , day in enumerate (DAY_OF_WEEK )}
557
623
@@ -560,7 +626,10 @@ def convert_to_string_list_value_dict(trigger_dict: Dict[str, int]) -> Dict[str,
560
626
FARE_BASIS_CODE , CLASS ,
561
627
AIRLINE_CODE_LIST , DAY_OF_WEEK ,
562
628
CITY_CODE_LIST , MEALS ,
563
- RESTRICT_CODES ]
629
+ RESTRICT_CODES ,
630
+ AIRCRAFT_MANUFACTURERS ,
631
+ AIRCRAFT_BASIC_CODES ]
632
+
564
633
TRIGGER_DICTS = [CITY_AIRPORT_CODES ,
565
634
AIRLINE_CODES ,
566
635
CITY_CODES ,
0 commit comments