Skip to content

Commit 1e4cd9c

Browse files
docs: add samples for ArimiaPlus time_series_id_col feature (#1577)
* docs: add samples for ArimiaPlus `time_series_id_col` feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * include demo.ipynb in gitignore * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent fb20e5e commit 1e4cd9c

File tree

3 files changed

+78
-5
lines changed

3 files changed

+78
-5
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,6 @@ coverage.xml
6060
system_tests/local_test_setup
6161

6262
# Make sure a generated file isn't accidentally committed.
63+
demo.ipynb
6364
pylintrc
6465
pylintrc.test

owlbot.py

+7
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@
6464
# Fixup files
6565
# ----------------------------------------------------------------------------
6666

67+
# Add scratch space for experimentation to .gitignore.
68+
assert 1 == s.replace(
69+
[".gitignore"],
70+
re.escape("# Make sure a generated file isn't accidentally committed.\n"),
71+
"# Make sure a generated file isn't accidentally committed.\ndemo.ipynb\n",
72+
)
73+
6774
# Encourage sharring all relevant versions in bug reports.
6875
assert 1 == s.replace( # bug_report.md
6976
[".github/ISSUE_TEMPLATE/bug_report.md"],

samples/snippets/create_multiple_timeseries_forecasting_model_test.py

+70-5
Original file line numberDiff line numberDiff line change
@@ -73,26 +73,91 @@ def test_multiple_timeseries_forecasting_model(random_model_id: str) -> None:
7373
from bigframes.ml import forecasting
7474
import bigframes.pandas as bpd
7575

76+
model = forecasting.ARIMAPlus(
77+
# To reduce the query runtime with the compromise of a potential slight
78+
# drop in model quality, you could decrease the value of the
79+
# auto_arima_max_order. This shrinks the search space of hyperparameter
80+
# tuning in the auto.ARIMA algorithm.
81+
auto_arima_max_order=5,
82+
)
83+
7684
df = bpd.read_gbq("bigquery-public-data.new_york.citibike_trips")
7785

86+
# This query creates twelve time series models, one for each of the twelve
87+
# Citi Bike start stations in the input data. If you remove this row
88+
# filter, there would be 600+ time series to forecast.
89+
df = df[df["start_station_name"].str.contains("Central Park")]
90+
7891
features = bpd.DataFrame(
7992
{
80-
"num_trips": df.starttime,
93+
"start_station_name": df["start_station_name"],
94+
"num_trips": df["starttime"],
8195
"date": df["starttime"].dt.date,
8296
}
8397
)
84-
num_trips = features.groupby(["date"], as_index=False).count()
85-
model = forecasting.ARIMAPlus()
98+
num_trips = features.groupby(
99+
["start_station_name", "date"],
100+
as_index=False,
101+
).count()
86102

87103
X = num_trips["date"].to_frame()
88104
y = num_trips["num_trips"].to_frame()
89105

90-
model.fit(X, y)
106+
model.fit(
107+
X,
108+
y,
109+
# The input data that you want to get forecasts for,
110+
# in this case the Citi Bike station, as represented by the
111+
# start_station_name column.
112+
id_col=num_trips["start_station_name"].to_frame(),
113+
)
114+
91115
# The model.fit() call above created a temporary model.
92116
# Use the to_gbq() method to write to a permanent location.
93-
94117
model.to_gbq(
95118
your_model_id, # For example: "bqml_tutorial.nyc_citibike_arima_model",
96119
replace=True,
97120
)
98121
# [END bigquery_dataframes_bqml_arima_multiple_step_3_fit]
122+
123+
# [START bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
124+
# Evaluate the time series models by using the summary() function. The summary()
125+
# function shows you the evaluation metrics of all the candidate models evaluated
126+
# during the process of automatic hyperparameter tuning.
127+
summary = model.summary()
128+
print(summary.peek())
129+
130+
# Expected output:
131+
# start_station_name non_seasonal_p non_seasonal_d non_seasonal_q has_drift log_likelihood AIC variance ...
132+
# 1 Central Park West & W 72 St 0 1 5 False -1966.449243 3944.898487 1215.689281 ...
133+
# 8 Central Park W & W 96 St 0 0 5 False -274.459923 562.919847 655.776577 ...
134+
# 9 Central Park West & W 102 St 0 0 0 False -226.639918 457.279835 258.83582 ...
135+
# 11 Central Park West & W 76 St 1 1 2 False -1700.456924 3408.913848 383.254161 ...
136+
# 4 Grand Army Plaza & Central Park S 0 1 5 False -5507.553498 11027.106996 624.138741 ...
137+
# [END bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
138+
139+
# [START bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
140+
coef = model.coef_
141+
print(coef.peek())
142+
143+
# Expected output:
144+
# start_station_name ar_coefficients ma_coefficients intercept_or_drift
145+
# 5 Central Park West & W 68 St [] [-0.41014089 0.21979212 -0.59854213 -0.251438... 0.0
146+
# 6 Central Park S & 6 Ave [] [-0.71488957 -0.36835772 0.61008532 0.183290... 0.0
147+
# 0 Central Park West & W 85 St [] [-0.39270166 -0.74494638 0.76432596 0.489146... 0.0
148+
# 3 W 82 St & Central Park West [-0.50219511 -0.64820817] [-0.20665325 0.67683137 -0.68108631] 0.0
149+
# 11 W 106 St & Central Park West [-0.70442887 -0.66885553 -0.25030325 -0.34160669] [] 0.0
150+
# [END bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
151+
152+
# [START bigquery_dataframes_bqml_arima_multiple_step_6_forecast]
153+
prediction = model.predict(horizon=3, confidence_level=0.9)
154+
155+
print(prediction.peek())
156+
# Expected output:
157+
# forecast_timestamp start_station_name forecast_value standard_error confidence_level ...
158+
# 4 2016-10-01 00:00:00+00:00 Central Park S & 6 Ave 302.377201 32.572948 0.9 ...
159+
# 14 2016-10-02 00:00:00+00:00 Central Park North & Adam Clayton Powell Blvd 263.917567 45.284082 0.9 ...
160+
# 1 2016-09-25 00:00:00+00:00 Central Park West & W 85 St 189.574706 39.874856 0.9 ...
161+
# 20 2016-10-02 00:00:00+00:00 Central Park West & W 72 St 175.474862 40.940794 0.9 ...
162+
# 12 2016-10-01 00:00:00+00:00 W 106 St & Central Park West 63.88163 18.088868 0.9 ...
163+
# [END bigquery_dataframes_bqml_arima_multiple_step_6_forecast]

0 commit comments

Comments
 (0)