@@ -73,26 +73,91 @@ def test_multiple_timeseries_forecasting_model(random_model_id: str) -> None:
73
73
from bigframes .ml import forecasting
74
74
import bigframes .pandas as bpd
75
75
76
+ model = forecasting .ARIMAPlus (
77
+ # To reduce the query runtime with the compromise of a potential slight
78
+ # drop in model quality, you could decrease the value of the
79
+ # auto_arima_max_order. This shrinks the search space of hyperparameter
80
+ # tuning in the auto.ARIMA algorithm.
81
+ auto_arima_max_order = 5 ,
82
+ )
83
+
76
84
df = bpd .read_gbq ("bigquery-public-data.new_york.citibike_trips" )
77
85
86
+ # This query creates twelve time series models, one for each of the twelve
87
+ # Citi Bike start stations in the input data. If you remove this row
88
+ # filter, there would be 600+ time series to forecast.
89
+ df = df [df ["start_station_name" ].str .contains ("Central Park" )]
90
+
78
91
features = bpd .DataFrame (
79
92
{
80
- "num_trips" : df .starttime ,
93
+ "start_station_name" : df ["start_station_name" ],
94
+ "num_trips" : df ["starttime" ],
81
95
"date" : df ["starttime" ].dt .date ,
82
96
}
83
97
)
84
- num_trips = features .groupby (["date" ], as_index = False ).count ()
85
- model = forecasting .ARIMAPlus ()
98
+ num_trips = features .groupby (
99
+ ["start_station_name" , "date" ],
100
+ as_index = False ,
101
+ ).count ()
86
102
87
103
X = num_trips ["date" ].to_frame ()
88
104
y = num_trips ["num_trips" ].to_frame ()
89
105
90
- model .fit (X , y )
106
+ model .fit (
107
+ X ,
108
+ y ,
109
+ # The input data that you want to get forecasts for,
110
+ # in this case the Citi Bike station, as represented by the
111
+ # start_station_name column.
112
+ id_col = num_trips ["start_station_name" ].to_frame (),
113
+ )
114
+
91
115
# The model.fit() call above created a temporary model.
92
116
# Use the to_gbq() method to write to a permanent location.
93
-
94
117
model .to_gbq (
95
118
your_model_id , # For example: "bqml_tutorial.nyc_citibike_arima_model",
96
119
replace = True ,
97
120
)
98
121
# [END bigquery_dataframes_bqml_arima_multiple_step_3_fit]
122
+
123
+ # [START bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
124
+ # Evaluate the time series models by using the summary() function. The summary()
125
+ # function shows you the evaluation metrics of all the candidate models evaluated
126
+ # during the process of automatic hyperparameter tuning.
127
+ summary = model .summary ()
128
+ print (summary .peek ())
129
+
130
+ # Expected output:
131
+ # start_station_name non_seasonal_p non_seasonal_d non_seasonal_q has_drift log_likelihood AIC variance ...
132
+ # 1 Central Park West & W 72 St 0 1 5 False -1966.449243 3944.898487 1215.689281 ...
133
+ # 8 Central Park W & W 96 St 0 0 5 False -274.459923 562.919847 655.776577 ...
134
+ # 9 Central Park West & W 102 St 0 0 0 False -226.639918 457.279835 258.83582 ...
135
+ # 11 Central Park West & W 76 St 1 1 2 False -1700.456924 3408.913848 383.254161 ...
136
+ # 4 Grand Army Plaza & Central Park S 0 1 5 False -5507.553498 11027.106996 624.138741 ...
137
+ # [END bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
138
+
139
+ # [START bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
140
+ coef = model .coef_
141
+ print (coef .peek ())
142
+
143
+ # Expected output:
144
+ # start_station_name ar_coefficients ma_coefficients intercept_or_drift
145
+ # 5 Central Park West & W 68 St [] [-0.41014089 0.21979212 -0.59854213 -0.251438... 0.0
146
+ # 6 Central Park S & 6 Ave [] [-0.71488957 -0.36835772 0.61008532 0.183290... 0.0
147
+ # 0 Central Park West & W 85 St [] [-0.39270166 -0.74494638 0.76432596 0.489146... 0.0
148
+ # 3 W 82 St & Central Park West [-0.50219511 -0.64820817] [-0.20665325 0.67683137 -0.68108631] 0.0
149
+ # 11 W 106 St & Central Park West [-0.70442887 -0.66885553 -0.25030325 -0.34160669] [] 0.0
150
+ # [END bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
151
+
152
+ # [START bigquery_dataframes_bqml_arima_multiple_step_6_forecast]
153
+ prediction = model .predict (horizon = 3 , confidence_level = 0.9 )
154
+
155
+ print (prediction .peek ())
156
+ # Expected output:
157
+ # forecast_timestamp start_station_name forecast_value standard_error confidence_level ...
158
+ # 4 2016-10-01 00:00:00+00:00 Central Park S & 6 Ave 302.377201 32.572948 0.9 ...
159
+ # 14 2016-10-02 00:00:00+00:00 Central Park North & Adam Clayton Powell Blvd 263.917567 45.284082 0.9 ...
160
+ # 1 2016-09-25 00:00:00+00:00 Central Park West & W 85 St 189.574706 39.874856 0.9 ...
161
+ # 20 2016-10-02 00:00:00+00:00 Central Park West & W 72 St 175.474862 40.940794 0.9 ...
162
+ # 12 2016-10-01 00:00:00+00:00 W 106 St & Central Park West 63.88163 18.088868 0.9 ...
163
+ # [END bigquery_dataframes_bqml_arima_multiple_step_6_forecast]
0 commit comments