From 815276288783c1ff7067405d4b2ad24fc307e5e5 Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 14 Nov 2024 23:27:27 +0000 Subject: [PATCH 1/4] docs: add snippet for predicting classifications using a boosted tree model --- .../classification_boosted_tree_model_test.py | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py index fbc9369dde..81bb752d9d 100644 --- a/samples/snippets/classification_boosted_tree_model_test.py +++ b/samples/snippets/classification_boosted_tree_model_test.py @@ -14,7 +14,7 @@ def test_boosted_tree_model(random_model_id: str) -> None: - # your_model_id = random_model_id + your_model_id = random_model_id # [START bigquery_dataframes_bqml_boosted_tree_prepare] import bigframes.pandas as bpd @@ -39,4 +39,29 @@ def test_boosted_tree_model(random_model_id: str) -> None: ) del input_data["functional_weight"] # [END bigquery_dataframes_bqml_boosted_tree_prepare] + # [START bigquery_dataframes_bqml_boosted_tree_predict] + # Select model you'll use for predictions. `read_gbq_model` loads model + # data from BigQuery, but you could also use the `tree_model` object + # from previous steps. + tree_model = bpd.read_gbq_model( + your_model_id, # For example: "your-project.bqml_tutorial.tree_model" + ) + + # input_data is defined in an earlier step. + prediction_data = input_data[input_data["dataframe"] == "prediction"] + + predictions = tree_model.predict(prediction_data) + predictions.peek() + # Output: + # predicted_income_bracket predicted_income_bracket_probs.label predicted_income_bracket_probs.prob + # <=50K >50K 0.05183430016040802 | + # <50K 0.94816571474075317 + # <=50K >50K 0.00365859130397439 + # <50K 0.99634140729904175 + # <=50K >50K 0.037775970995426178 + # <50K 0.96222406625747681 + # [END bigquery_dataframes_bqml_boosted_tree_predict] assert input_data is not None + assert tree_model is not None + assert predictions is not None + assert prediction_data is not None From 3ea3eb694a80f290a626924a4538fe7119e6b996 Mon Sep 17 00:00:00 2001 From: Daniela Date: Wed, 20 Nov 2024 16:02:21 +0000 Subject: [PATCH 2/4] merge and rename bigquery_dataframes_bqml_boosted_tree_explain to bigquery_dataframes_bqml_boosted_tree_evaluate --- samples/snippets/classification_boosted_tree_model_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py index d15bd09ea8..649afd8773 100644 --- a/samples/snippets/classification_boosted_tree_model_test.py +++ b/samples/snippets/classification_boosted_tree_model_test.py @@ -62,7 +62,7 @@ def test_boosted_tree_model(random_model_id: str) -> None: replace=True, ) # [END bigquery_dataframes_bqml_boosted_tree_create] - # [START bigquery_dataframes_bqml_boosted_tree_explain] + # [START bigquery_dataframes_bqml_boosted_tree_evaluate] # Select model you'll use for predictions. `read_gbq_model` loads model # data from BigQuery, but you could also use the `tree_model` object # from the previous step. @@ -82,7 +82,7 @@ def test_boosted_tree_model(random_model_id: str) -> None: # Output: # precision recall accuracy f1_score log_loss roc_auc # 0 0.671924 0.578804 0.839429 0.621897 0.344054 0.887335 - # [END bigquery_dataframes_bqml_boosted_tree_explain] + # [END bigquery_dataframes_bqml_boosted_tree_evaluate] assert tree_model is not None assert evaluation_data is not None assert score is not None From 619b6c4d0e34b5c05b685a834429ecdca696c705 Mon Sep 17 00:00:00 2001 From: Daniela Date: Wed, 20 Nov 2024 16:04:10 +0000 Subject: [PATCH 3/4] remove training | --- samples/snippets/classification_boosted_tree_model_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py index 649afd8773..bfafc1bcec 100644 --- a/samples/snippets/classification_boosted_tree_model_test.py +++ b/samples/snippets/classification_boosted_tree_model_test.py @@ -101,7 +101,7 @@ def test_boosted_tree_model(random_model_id: str) -> None: predictions.peek() # Output: # predicted_income_bracket predicted_income_bracket_probs.label predicted_income_bracket_probs.prob - # <=50K >50K 0.05183430016040802 | + # <=50K >50K 0.05183430016040802 # <50K 0.94816571474075317 # <=50K >50K 0.00365859130397439 # <50K 0.99634140729904175 From 759a24bfac4d45f2421ca99e395fa792b1d99ced Mon Sep 17 00:00:00 2001 From: Daniela Date: Wed, 20 Nov 2024 16:08:11 +0000 Subject: [PATCH 4/4] clean up asserts --- .../snippets/classification_boosted_tree_model_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/snippets/classification_boosted_tree_model_test.py b/samples/snippets/classification_boosted_tree_model_test.py index bfafc1bcec..c79706a020 100644 --- a/samples/snippets/classification_boosted_tree_model_test.py +++ b/samples/snippets/classification_boosted_tree_model_test.py @@ -83,9 +83,6 @@ def test_boosted_tree_model(random_model_id: str) -> None: # precision recall accuracy f1_score log_loss roc_auc # 0 0.671924 0.578804 0.839429 0.621897 0.344054 0.887335 # [END bigquery_dataframes_bqml_boosted_tree_evaluate] - assert tree_model is not None - assert evaluation_data is not None - assert score is not None # [START bigquery_dataframes_bqml_boosted_tree_predict] # Select model you'll use for predictions. `read_gbq_model` loads model # data from BigQuery, but you could also use the `tree_model` object @@ -109,6 +106,9 @@ def test_boosted_tree_model(random_model_id: str) -> None: # <50K 0.96222406625747681 # [END bigquery_dataframes_bqml_boosted_tree_predict] assert input_data is not None + assert training_data is not None assert tree_model is not None - assert predictions is not None + assert evaluation_data is not None + assert score is not None assert prediction_data is not None + assert predictions is not None