diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py
index 550b4a8178..5662e54d6d 100644
--- a/bigframes/ml/base.py
+++ b/bigframes/ml/base.py
@@ -164,6 +164,40 @@ def fit(
         return self._fit(X, y)
 
 
+class TrainableWithEvaluationPredictor(TrainablePredictor):
+    """A BigQuery DataFrames ML Model base class that can be used to fit and predict outputs.
+
+    Additional evaluation data can be provided to measure the model in the fit phase."""
+
+    @abc.abstractmethod
+    def _fit(self, X, y, transforms=None, X_eval=None, y_eval=None):
+        pass
+
+    @abc.abstractmethod
+    def score(self, X, y):
+        pass
+
+
+class SupervisedTrainableWithEvaluationPredictor(TrainableWithEvaluationPredictor):
+    """A BigQuery DataFrames ML Supervised Model base class that can be used to fit and predict outputs.
+
+    Need to provide both X and y in supervised tasks.
+
+    Additional X_eval and y_eval can be provided to measure the model in the fit phase.
+    """
+
+    _T = TypeVar("_T", bound="SupervisedTrainableWithEvaluationPredictor")
+
+    def fit(
+        self: _T,
+        X: utils.ArrayType,
+        y: utils.ArrayType,
+        X_eval: Optional[utils.ArrayType] = None,
+        y_eval: Optional[utils.ArrayType] = None,
+    ) -> _T:
+        return self._fit(X, y, X_eval=X_eval, y_eval=y_eval)
+
+
 class UnsupervisedTrainablePredictor(TrainablePredictor):
     """A BigQuery DataFrames ML Unsupervised Model base class that can be used to fit and predict outputs.
 
diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index 91c14e4336..253ef7c5c1 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -52,7 +52,7 @@
 
 @log_adapter.class_logger
 class XGBRegressor(
-    base.SupervisedTrainablePredictor,
+    base.SupervisedTrainableWithEvaluationPredictor,
     bigframes_vendored.xgboost.sklearn.XGBRegressor,
 ):
     __doc__ = bigframes_vendored.xgboost.sklearn.XGBRegressor.__doc__
@@ -145,14 +145,24 @@ def _fit(
         X: utils.ArrayType,
         y: utils.ArrayType,
         transforms: Optional[List[str]] = None,
+        X_eval: Optional[utils.ArrayType] = None,
+        y_eval: Optional[utils.ArrayType] = None,
     ) -> XGBRegressor:
         X, y = utils.convert_to_dataframe(X, y)
 
+        bqml_options = self._bqml_options
+
+        if X_eval is not None and y_eval is not None:
+            X_eval, y_eval = utils.convert_to_dataframe(X_eval, y_eval)
+            X, y, bqml_options = utils.combine_training_and_evaluation_data(
+                X, y, X_eval, y_eval, bqml_options
+            )
+
         self._bqml_model = self._bqml_model_factory.create_model(
             X,
             y,
             transforms=transforms,
-            options=self._bqml_options,
+            options=bqml_options,
         )
         return self
 
@@ -200,7 +210,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor:
 
 @log_adapter.class_logger
 class XGBClassifier(
-    base.SupervisedTrainablePredictor,
+    base.SupervisedTrainableWithEvaluationPredictor,
     bigframes_vendored.xgboost.sklearn.XGBClassifier,
 ):
 
@@ -294,14 +304,24 @@ def _fit(
         X: utils.ArrayType,
         y: utils.ArrayType,
         transforms: Optional[List[str]] = None,
+        X_eval: Optional[utils.ArrayType] = None,
+        y_eval: Optional[utils.ArrayType] = None,
     ) -> XGBClassifier:
         X, y = utils.convert_to_dataframe(X, y)
 
+        bqml_options = self._bqml_options
+
+        if X_eval is not None and y_eval is not None:
+            X_eval, y_eval = utils.convert_to_dataframe(X_eval, y_eval)
+            X, y, bqml_options = utils.combine_training_and_evaluation_data(
+                X, y, X_eval, y_eval, bqml_options
+            )
+
         self._bqml_model = self._bqml_model_factory.create_model(
             X,
             y,
             transforms=transforms,
-            options=self._bqml_options,
+            options=bqml_options,
         )
         return self
 
@@ -347,7 +367,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier:
 
 @log_adapter.class_logger
 class RandomForestRegressor(
-    base.SupervisedTrainablePredictor,
+    base.SupervisedTrainableWithEvaluationPredictor,
     bigframes_vendored.sklearn.ensemble._forest.RandomForestRegressor,
 ):
 
@@ -430,14 +450,24 @@ def _fit(
         X: utils.ArrayType,
         y: utils.ArrayType,
         transforms: Optional[List[str]] = None,
+        X_eval: Optional[utils.ArrayType] = None,
+        y_eval: Optional[utils.ArrayType] = None,
     ) -> RandomForestRegressor:
         X, y = utils.convert_to_dataframe(X, y)
 
+        bqml_options = self._bqml_options
+
+        if X_eval is not None and y_eval is not None:
+            X_eval, y_eval = utils.convert_to_dataframe(X_eval, y_eval)
+            X, y, bqml_options = utils.combine_training_and_evaluation_data(
+                X, y, X_eval, y_eval, bqml_options
+            )
+
         self._bqml_model = self._bqml_model_factory.create_model(
             X,
             y,
             transforms=transforms,
-            options=self._bqml_options,
+            options=bqml_options,
         )
         return self
 
@@ -503,7 +533,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso
 
 @log_adapter.class_logger
 class RandomForestClassifier(
-    base.SupervisedTrainablePredictor,
+    base.SupervisedTrainableWithEvaluationPredictor,
     bigframes_vendored.sklearn.ensemble._forest.RandomForestClassifier,
 ):
 
@@ -586,14 +616,24 @@ def _fit(
         X: utils.ArrayType,
         y: utils.ArrayType,
         transforms: Optional[List[str]] = None,
+        X_eval: Optional[utils.ArrayType] = None,
+        y_eval: Optional[utils.ArrayType] = None,
     ) -> RandomForestClassifier:
         X, y = utils.convert_to_dataframe(X, y)
 
+        bqml_options = self._bqml_options
+
+        if X_eval is not None and y_eval is not None:
+            X_eval, y_eval = utils.convert_to_dataframe(X_eval, y_eval)
+            X, y, bqml_options = utils.combine_training_and_evaluation_data(
+                X, y, X_eval, y_eval, bqml_options
+            )
+
         self._bqml_model = self._bqml_model_factory.create_model(
             X,
             y,
             transforms=transforms,
-            options=self._bqml_options,
+            options=bqml_options,
         )
         return self
 
diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py
index 5665507286..85be54e596 100644
--- a/bigframes/ml/linear_model.py
+++ b/bigframes/ml/linear_model.py
@@ -47,7 +47,7 @@
 
 @log_adapter.class_logger
 class LinearRegression(
-    base.SupervisedTrainablePredictor,
+    base.SupervisedTrainableWithEvaluationPredictor,
     bigframes_vendored.sklearn.linear_model._base.LinearRegression,
 ):
     __doc__ = bigframes_vendored.sklearn.linear_model._base.LinearRegression.__doc__
@@ -131,14 +131,24 @@ def _fit(
         X: utils.ArrayType,
         y: utils.ArrayType,
         transforms: Optional[List[str]] = None,
+        X_eval: Optional[utils.ArrayType] = None,
+        y_eval: Optional[utils.ArrayType] = None,
     ) -> LinearRegression:
         X, y = utils.convert_to_dataframe(X, y)
 
+        bqml_options = self._bqml_options
+
+        if X_eval is not None and y_eval is not None:
+            X_eval, y_eval = utils.convert_to_dataframe(X_eval, y_eval)
+            X, y, bqml_options = utils.combine_training_and_evaluation_data(
+                X, y, X_eval, y_eval, bqml_options
+            )
+
         self._bqml_model = self._bqml_model_factory.create_model(
             X,
             y,
             transforms=transforms,
-            options=self._bqml_options,
+            options=bqml_options,
         )
         return self
 
@@ -183,7 +193,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> LinearRegression:
 
 @log_adapter.class_logger
 class LogisticRegression(
-    base.SupervisedTrainablePredictor,
+    base.SupervisedTrainableWithEvaluationPredictor,
     bigframes_vendored.sklearn.linear_model._logistic.LogisticRegression,
 ):
     __doc__ = (
@@ -283,15 +293,24 @@ def _fit(
         X: utils.ArrayType,
         y: utils.ArrayType,
         transforms: Optional[List[str]] = None,
+        X_eval: Optional[utils.ArrayType] = None,
+        y_eval: Optional[utils.ArrayType] = None,
     ) -> LogisticRegression:
-        """Fit model with transforms."""
         X, y = utils.convert_to_dataframe(X, y)
 
+        bqml_options = self._bqml_options
+
+        if X_eval is not None and y_eval is not None:
+            X_eval, y_eval = utils.convert_to_dataframe(X_eval, y_eval)
+            X, y, bqml_options = utils.combine_training_and_evaluation_data(
+                X, y, X_eval, y_eval, bqml_options
+            )
+
         self._bqml_model = self._bqml_model_factory.create_model(
             X,
             y,
             transforms=transforms,
-            options=self._bqml_options,
+            options=bqml_options,
         )
         return self
 
diff --git a/bigframes/ml/utils.py b/bigframes/ml/utils.py
index bdca45e457..8daed169da 100644
--- a/bigframes/ml/utils.py
+++ b/bigframes/ml/utils.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import typing
-from typing import Any, Generator, Literal, Mapping, Optional, Union
+from typing import Any, Generator, Literal, Mapping, Optional, Tuple, Union
 
 import bigframes_vendored.constants as constants
 from google.cloud import bigquery
 import pandas as pd
 
-from bigframes.core import blocks
+from bigframes.core import blocks, guid
 import bigframes.pandas as bpd
 from bigframes.session import Session
 
@@ -155,3 +155,37 @@ def retrieve_params_from_bq_model(
             kwargs[bf_param] = bf_param_type(last_fitting[bqml_param])
 
     return kwargs
+
+
+def combine_training_and_evaluation_data(
+    X_train: bpd.DataFrame,
+    y_train: bpd.DataFrame,
+    X_eval: bpd.DataFrame,
+    y_eval: bpd.DataFrame,
+    bqml_options: dict,
+) -> Tuple[bpd.DataFrame, bpd.DataFrame, dict]:
+    """
+    Combine training data and labels with evlauation data and labels, and keep
+    them differentiated through a split column in the combined data and labels.
+    """
+
+    assert X_train.columns.equals(X_eval.columns)
+    assert y_train.columns.equals(y_eval.columns)
+
+    # create a custom split column for BQML and supply the evaluation
+    # data along with the training data in a combined single table
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-dnn-models#data_split_col.
+    split_col = guid.generate_guid()
+    assert split_col not in X_train.columns
+
+    X_train[split_col] = False
+    X_eval[split_col] = True
+    X = bpd.concat([X_train, X_eval])
+    y = bpd.concat([y_train, y_eval])
+
+    # create options copy to not mutate the incoming one
+    bqml_options = bqml_options.copy()
+    bqml_options["data_split_method"] = "CUSTOM"
+    bqml_options["data_split_col"] = split_col
+
+    return X, y, bqml_options
diff --git a/tests/system/large/ml/test_linear_model.py b/tests/system/large/ml/test_linear_model.py
index 273da97bc5..f6ca26e7e4 100644
--- a/tests/system/large/ml/test_linear_model.py
+++ b/tests/system/large/ml/test_linear_model.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import pandas as pd
+
+from bigframes.ml import model_selection
 import bigframes.ml.linear_model
 from tests.system import utils
 
@@ -58,6 +61,85 @@ def test_linear_regression_configure_fit_score(penguins_df_default_index, datase
     assert reloaded_model.tol == 0.01
 
 
+def test_linear_regression_configure_fit_with_eval_score(
+    penguins_df_default_index, dataset_id
+):
+    model = bigframes.ml.linear_model.LinearRegression()
+
+    df = penguins_df_default_index.dropna()
+    X = df[
+        [
+            "species",
+            "island",
+            "culmen_length_mm",
+            "culmen_depth_mm",
+            "flipper_length_mm",
+            "sex",
+        ]
+    ]
+    y = df[["body_mass_g"]]
+
+    X_train, X_eval, y_train, y_eval = model_selection.train_test_split(X, y)
+
+    model.fit(X_train, y_train, X_eval=X_eval, y_eval=y_eval)
+
+    # Check score to ensure the model was fitted
+    result = model.score(X_eval, y_eval).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_REGRESSION_METRICS, index=1
+    )
+
+    # save, load, check parameters to ensure configuration was kept
+    bq_model_name = f"{dataset_id}.temp_configured_model"
+    reloaded_model = model.to_gbq(bq_model_name, replace=True)
+    assert reloaded_model._bqml_model is not None
+    assert (
+        f"{dataset_id}.temp_configured_model" in reloaded_model._bqml_model.model_name
+    )
+    assert reloaded_model.optimize_strategy == "NORMAL_EQUATION"
+    assert reloaded_model.fit_intercept is True
+    assert reloaded_model.calculate_p_values is False
+    assert reloaded_model.enable_global_explain is False
+    assert reloaded_model.l1_reg is None
+    assert reloaded_model.l2_reg == 0.0
+    assert reloaded_model.learning_rate is None
+    assert reloaded_model.learning_rate_strategy == "line_search"
+    assert reloaded_model.ls_init_learning_rate is None
+    assert reloaded_model.max_iterations == 20
+    assert reloaded_model.tol == 0.01
+
+    # make sure the bqml model was internally created with custom split
+    bq_model = penguins_df_default_index._session.bqclient.get_model(bq_model_name)
+    last_fitting = bq_model.training_runs[-1]["trainingOptions"]
+    assert last_fitting["dataSplitMethod"] == "CUSTOM"
+    assert "dataSplitColumn" in last_fitting
+
+    # make sure the bqml model has the same  evaluation metrics attached as
+    # returned by model.score()
+    bq_model_expected_eval_metrics = result[utils.ML_REGRESSION_METRICS[:5]]
+    bq_model_eval_metrics = bq_model.training_runs[-1]["evaluationMetrics"][
+        "regressionMetrics"
+    ]
+    bq_model_eval_metrics = pd.DataFrame(
+        [
+            [
+                bq_model_eval_metrics["meanAbsoluteError"],
+                bq_model_eval_metrics["meanSquaredError"],
+                bq_model_eval_metrics["meanSquaredLogError"],
+                bq_model_eval_metrics["medianAbsoluteError"],
+                bq_model_eval_metrics["rSquared"],
+            ]
+        ],
+        columns=utils.ML_REGRESSION_METRICS[:5],
+    )
+    pd.testing.assert_frame_equal(
+        bq_model_expected_eval_metrics,
+        bq_model_eval_metrics,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
 def test_linear_regression_customized_params_fit_score(
     penguins_df_default_index, dataset_id
 ):
@@ -216,6 +298,80 @@ def test_logistic_regression_configure_fit_score(penguins_df_default_index, data
     assert reloaded_model.class_weight is None
 
 
+def test_logistic_regression_configure_fit_with_eval_score(
+    penguins_df_default_index, dataset_id
+):
+    model = bigframes.ml.linear_model.LogisticRegression()
+
+    df = penguins_df_default_index.dropna()
+    df = df[df["sex"].isin(["MALE", "FEMALE"])]
+
+    X = df[
+        [
+            "species",
+            "island",
+            "culmen_length_mm",
+            "culmen_depth_mm",
+            "flipper_length_mm",
+            "body_mass_g",
+        ]
+    ]
+    y = df[["sex"]]
+
+    X_train, X_eval, y_train, y_eval = model_selection.train_test_split(X, y)
+
+    model.fit(X_train, y_train, X_eval=X_eval, y_eval=y_eval)
+
+    # Check score to ensure the model was fitted
+    result = model.score(X_eval, y_eval).to_pandas()
+    utils.check_pandas_df_schema_and_index(
+        result, columns=utils.ML_CLASSFICATION_METRICS, index=1
+    )
+
+    # save, load, check parameters to ensure configuration was kept
+    bq_model_name = f"{dataset_id}.temp_configured_logistic_reg_model"
+    reloaded_model = model.to_gbq(bq_model_name, replace=True)
+    assert reloaded_model._bqml_model is not None
+    assert (
+        f"{dataset_id}.temp_configured_logistic_reg_model"
+        in reloaded_model._bqml_model.model_name
+    )
+    assert reloaded_model.fit_intercept is True
+    assert reloaded_model.class_weight is None
+
+    # make sure the bqml model was internally created with custom split
+    bq_model = penguins_df_default_index._session.bqclient.get_model(bq_model_name)
+    last_fitting = bq_model.training_runs[-1]["trainingOptions"]
+    assert last_fitting["dataSplitMethod"] == "CUSTOM"
+    assert "dataSplitColumn" in last_fitting
+
+    # make sure the bqml model has the same  evaluation metrics attached as
+    # returned by model.score()
+    bq_model_expected_eval_metrics = result
+    bq_model_eval_metrics = bq_model.training_runs[-1]["evaluationMetrics"][
+        "binaryClassificationMetrics"
+    ]["aggregateClassificationMetrics"]
+    bq_model_eval_metrics = pd.DataFrame(
+        [
+            [
+                bq_model_eval_metrics["precision"],
+                bq_model_eval_metrics["recall"],
+                bq_model_eval_metrics["accuracy"],
+                bq_model_eval_metrics["f1Score"],
+                bq_model_eval_metrics["logLoss"],
+                bq_model_eval_metrics["rocAuc"],
+            ]
+        ],
+        columns=utils.ML_CLASSFICATION_METRICS,
+    )
+    pd.testing.assert_frame_equal(
+        bq_model_expected_eval_metrics,
+        bq_model_eval_metrics,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
 def test_logistic_regression_customized_params_fit_score(
     penguins_df_default_index, dataset_id
 ):
diff --git a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
index 1f6284c146..fb81bd6684 100644
--- a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
+++ b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
@@ -54,6 +54,13 @@ def fit(self, X, y):
                 Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
+            X_eval (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or DataFrame of shape (n_samples, n_features). Evaluation data.
+
+            y_eval (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
+                Evaluation target values. Will be cast to X_eval's dtype if necessary.
+
 
         Returns:
             ForestModel: Fitted estimator.
diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_base.py b/third_party/bigframes_vendored/sklearn/linear_model/_base.py
index fa8f28a656..d6b8a473bd 100644
--- a/third_party/bigframes_vendored/sklearn/linear_model/_base.py
+++ b/third_party/bigframes_vendored/sklearn/linear_model/_base.py
@@ -108,6 +108,13 @@ def fit(
                 Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
+            X_eval (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or DataFrame of shape (n_samples, n_features). Evaluation data.
+
+            y_eval (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
+                Evaluation target values. Will be cast to X_eval's dtype if necessary.
+
         Returns:
             LinearRegression: Fitted estimator.
         """
diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
index f3419ba8a9..479be19596 100644
--- a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
+++ b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py
@@ -79,6 +79,14 @@ def fit(
             y (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
                 DataFrame of shape (n_samples,). Target vector relative to X.
 
+            X_eval (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                Series or DataFrame of shape (n_samples, n_features). Evaluation vector,
+                where `n_samples` is the number of samples and `n_features` is
+                the number of features.
+
+            y_eval (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
+                DataFrame of shape (n_samples,). Target vector relative to X_eval.
+
 
         Returns:
             LogisticRegression: Fitted estimator.
diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py
index da1396af02..60a22e83d0 100644
--- a/third_party/bigframes_vendored/xgboost/sklearn.py
+++ b/third_party/bigframes_vendored/xgboost/sklearn.py
@@ -37,6 +37,13 @@ def fit(self, X, y):
                 DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
+            X_eval (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                Series or DataFrame of shape (n_samples, n_features). Evaluation data.
+
+            y_eval (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                DataFrame of shape (n_samples,) or (n_samples, n_targets).
+                Evaluation target values. Will be cast to X_eval's dtype if necessary.
+
         Returns:
             XGBModel: Fitted estimator.
         """