docs: add samples for ArimiaPlus time_series_id_col feature (#1577)

tswast · gcf-owl-bot[bot] · web-flow · commit 1e4cd9cf69f9 · 2025-04-01T13:24:03.000-05:00
* docs: add samples for ArimiaPlus `time_series_id_col` feature * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * include demo.ipynb in gitignore * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
diff --git a/.gitignore b/.gitignore
@@ -60,5 +60,6 @@ coverage.xml
 system_tests/local_test_setup
 
 # Make sure a generated file isn't accidentally committed.
+demo.ipynb
 pylintrc
 pylintrc.test
diff --git a/owlbot.py b/owlbot.py
@@ -64,6 +64,13 @@
 # Fixup files
 # ----------------------------------------------------------------------------
 
+# Add scratch space for experimentation to .gitignore.
+assert 1 == s.replace(
+    [".gitignore"],
+    re.escape("# Make sure a generated file isn't accidentally committed.\n"),
+    "# Make sure a generated file isn't accidentally committed.\ndemo.ipynb\n",
+)
+
 # Encourage sharring all relevant versions in bug reports.
 assert 1 == s.replace(  # bug_report.md
     [".github/ISSUE_TEMPLATE/bug_report.md"],
diff --git a/samples/snippets/create_multiple_timeseries_forecasting_model_test.py b/samples/snippets/create_multiple_timeseries_forecasting_model_test.py
@@ -73,26 +73,91 @@ def test_multiple_timeseries_forecasting_model(random_model_id: str) -> None:
     from bigframes.ml import forecasting
     import bigframes.pandas as bpd
 
+    model = forecasting.ARIMAPlus(
+        # To reduce the query runtime with the compromise of a potential slight
+        # drop in model quality, you could decrease the value of the
+        # auto_arima_max_order. This shrinks the search space of hyperparameter
+        # tuning in the auto.ARIMA algorithm.
+        auto_arima_max_order=5,
+    )
+
     df = bpd.read_gbq("bigquery-public-data.new_york.citibike_trips")
 
+    # This query creates twelve time series models, one for each of the twelve
+    # Citi Bike start stations in the input data. If you remove this row
+    # filter, there would be 600+ time series to forecast.
+    df = df[df["start_station_name"].str.contains("Central Park")]
+
     features = bpd.DataFrame(
         {
-            "num_trips": df.starttime,
+            "start_station_name": df["start_station_name"],
+            "num_trips": df["starttime"],
             "date": df["starttime"].dt.date,
         }
     )
-    num_trips = features.groupby(["date"], as_index=False).count()
-    model = forecasting.ARIMAPlus()
+    num_trips = features.groupby(
+        ["start_station_name", "date"],
+        as_index=False,
+    ).count()
 
     X = num_trips["date"].to_frame()
     y = num_trips["num_trips"].to_frame()
 
-    model.fit(X, y)
+    model.fit(
+        X,
+        y,
+        # The input data that you want to get forecasts for,
+        # in this case the Citi Bike station, as represented by the
+        # start_station_name column.
+        id_col=num_trips["start_station_name"].to_frame(),
+    )
+
     # The model.fit() call above created a temporary model.
     # Use the to_gbq() method to write to a permanent location.
-
     model.to_gbq(
         your_model_id,  # For example: "bqml_tutorial.nyc_citibike_arima_model",
         replace=True,
     )
     # [END bigquery_dataframes_bqml_arima_multiple_step_3_fit]
+
+    # [START bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
+    # Evaluate the time series models by using the summary() function. The summary()
+    # function shows you the evaluation metrics of all the candidate models evaluated
+    # during the process of automatic hyperparameter tuning.
+    summary = model.summary()
+    print(summary.peek())
+
+    # Expected output:
+    #    start_station_name                  non_seasonal_p  non_seasonal_d   non_seasonal_q  has_drift  log_likelihood           AIC     variance ...
+    # 1         Central Park West & W 72 St               0               1                5      False    -1966.449243   3944.898487  1215.689281 ...
+    # 8            Central Park W & W 96 St               0               0                5      False     -274.459923    562.919847   655.776577 ...
+    # 9        Central Park West & W 102 St               0               0                0      False     -226.639918    457.279835    258.83582 ...
+    # 11        Central Park West & W 76 St               1               1                2      False    -1700.456924   3408.913848   383.254161 ...
+    # 4   Grand Army Plaza & Central Park S               0               1                5      False    -5507.553498  11027.106996   624.138741 ...
+    # [END bigquery_dataframes_bqml_arima_multiple_step_4_evaluate]
+
+    # [START bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
+    coef = model.coef_
+    print(coef.peek())
+
+    # Expected output:
+    #    start_station_name                                              ar_coefficients                                   ma_coefficients intercept_or_drift
+    # 5    Central Park West & W 68 St                                                [] [-0.41014089  0.21979212 -0.59854213 -0.251438...                0.0
+    # 6         Central Park S & 6 Ave                                                [] [-0.71488957 -0.36835772  0.61008532  0.183290...                0.0
+    # 0    Central Park West & W 85 St                                                [] [-0.39270166 -0.74494638  0.76432596  0.489146...                0.0
+    # 3    W 82 St & Central Park West                         [-0.50219511 -0.64820817]             [-0.20665325  0.67683137 -0.68108631]                0.0
+    # 11  W 106 St & Central Park West [-0.70442887 -0.66885553 -0.25030325 -0.34160669]                                                []                0.0
+    # [END bigquery_dataframes_bqml_arima_multiple_step_5_coefficients]
+
+    # [START bigquery_dataframes_bqml_arima_multiple_step_6_forecast]
+    prediction = model.predict(horizon=3, confidence_level=0.9)
+
+    print(prediction.peek())
+    # Expected output:
+    #            forecast_timestamp                             start_station_name  forecast_value  standard_error  confidence_level ...
+    # 4   2016-10-01 00:00:00+00:00                         Central Park S & 6 Ave      302.377201       32.572948               0.9 ...
+    # 14  2016-10-02 00:00:00+00:00  Central Park North & Adam Clayton Powell Blvd      263.917567       45.284082               0.9 ...
+    # 1   2016-09-25 00:00:00+00:00                    Central Park West & W 85 St      189.574706       39.874856               0.9 ...
+    # 20  2016-10-02 00:00:00+00:00                    Central Park West & W 72 St      175.474862       40.940794               0.9 ...
+    # 12  2016-10-01 00:00:00+00:00                   W 106 St & Central Park West        63.88163       18.088868               0.9 ...
+    # [END bigquery_dataframes_bqml_arima_multiple_step_6_forecast]