diff --git a/CHANGES.md b/CHANGES.md
index cd493f25f..e7da259bf 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 ### Changed
+
+- All neural net classes now inherit from sklearn's [`BaseEstimator`](https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html). This is to support compatibility with sklearn 1.6.0 and above. Classification models additionally inherit from [`ClassifierMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.ClassifierMixin.html) and regressors from [`RegressorMixin`](https://scikit-learn.org/stable/modules/generated/sklearn.base.RegressorMixin.html).
+
 ### Fixed
 
 - Fix an issue with using `NeuralNetBinaryClassifier` with `torch.compile` (#1058)
diff --git a/skorch/callbacks/base.py b/skorch/callbacks/base.py
index ecd3eed59..c78d6a754 100644
--- a/skorch/callbacks/base.py
+++ b/skorch/callbacks/base.py
@@ -1,9 +1,6 @@
 """ Basic callback definition. """
 
-import warnings
-
 from sklearn.base import BaseEstimator
-from skorch.exceptions import SkorchWarning
 
 
 __all__ = ['Callback']
diff --git a/skorch/classifier.py b/skorch/classifier.py
index 82707367b..2b36cac46 100644
--- a/skorch/classifier.py
+++ b/skorch/classifier.py
@@ -51,7 +51,7 @@ def get_neural_net_clf_doc(doc):
 
 
 # pylint: disable=missing-docstring
-class NeuralNetClassifier(NeuralNet, ClassifierMixin):
+class NeuralNetClassifier(ClassifierMixin, NeuralNet):
     __doc__ = get_neural_net_clf_doc(NeuralNet.__doc__)
 
     def __init__(
@@ -258,7 +258,7 @@ def get_neural_net_binary_clf_doc(doc):
     return doc
 
 
-class NeuralNetBinaryClassifier(NeuralNet, ClassifierMixin):
+class NeuralNetBinaryClassifier(ClassifierMixin, NeuralNet):
     # pylint: disable=missing-docstring
     __doc__ = get_neural_net_binary_clf_doc(NeuralNet.__doc__)
 
diff --git a/skorch/hf.py b/skorch/hf.py
index c370af122..d63dd159d 100644
--- a/skorch/hf.py
+++ b/skorch/hf.py
@@ -24,7 +24,7 @@
 from skorch.utils import check_is_fitted, params_for
 
 
-class _HuggingfaceTokenizerBase(BaseEstimator, TransformerMixin):
+class _HuggingfaceTokenizerBase(TransformerMixin, BaseEstimator):
     """Base class for yet to train and pretrained tokenizers
 
     Implements the ``vocabulary_`` attribute and the methods
diff --git a/skorch/llm/classifier.py b/skorch/llm/classifier.py
index ecf5c54f1..0131712e2 100644
--- a/skorch/llm/classifier.py
+++ b/skorch/llm/classifier.py
@@ -276,7 +276,7 @@ def generate_logits(self, *, label_id, **kwargs):
         return recorded_logits + recorder.recorded_scores[:]
 
 
-class _LlmBase(BaseEstimator, ClassifierMixin):
+class _LlmBase(ClassifierMixin, BaseEstimator):
     """Base class for LLM models
 
     This class handles a few of the checks, as well as the whole prediction
diff --git a/skorch/net.py b/skorch/net.py
index 6b7748be9..6ea319a2b 100644
--- a/skorch/net.py
+++ b/skorch/net.py
@@ -50,7 +50,7 @@
 
 
 # pylint: disable=too-many-instance-attributes
-class NeuralNet:
+class NeuralNet(BaseEstimator):
     # pylint: disable=anomalous-backslash-in-string
     """NeuralNet base class.
 
@@ -1992,7 +1992,7 @@ def _get_params_callbacks(self, deep=True):
         return params
 
     def get_params(self, deep=True, **kwargs):
-        params = BaseEstimator.get_params(self, deep=deep, **kwargs)
+        params = super().get_params(deep=deep, **kwargs)
         # Callback parameters are not returned by .get_params, needs
         # special treatment.
         params_cb = self._get_params_callbacks(deep=deep)
@@ -2111,7 +2111,7 @@ def set_params(self, **kwargs):
                 normal_params[key] = val
 
         self._apply_virtual_params(virtual_params)
-        BaseEstimator.set_params(self, **normal_params)
+        super().set_params(**normal_params)
 
         for key, val in special_params.items():
             if key.endswith('_'):
diff --git a/skorch/probabilistic.py b/skorch/probabilistic.py
index d972d377f..854ca1257 100644
--- a/skorch/probabilistic.py
+++ b/skorch/probabilistic.py
@@ -12,6 +12,7 @@
 import gpytorch
 import numpy as np
 import torch
+from sklearn.base import ClassifierMixin, RegressorMixin
 
 from skorch.net import NeuralNet
 from skorch.dataset import ValidSplit
@@ -391,7 +392,7 @@ def __getstate__(self):
             raise pickle.PicklingError(msg) from exc
 
 
-class _GPRegressorPredictMixin:
+class _GPRegressorPredictMixin(RegressorMixin):
     """Mixin class that provides a predict method for GP regressors."""
     def predict(self, X, return_std=False, return_cov=False):
         """Returns the predicted mean and optionally standard deviation.
@@ -778,7 +779,7 @@ def get_gp_binary_clf_doc(doc):
     return doc
 
 
-class GPBinaryClassifier(GPBase):
+class GPBinaryClassifier(ClassifierMixin, GPBase):
     __doc__ = get_gp_binary_clf_doc(NeuralNet.__doc__)
 
     def __init__(
diff --git a/skorch/regressor.py b/skorch/regressor.py
index 3e2c40ce9..a77cd2763 100644
--- a/skorch/regressor.py
+++ b/skorch/regressor.py
@@ -33,7 +33,7 @@ def get_neural_net_reg_doc(doc):
 
 
 # pylint: disable=missing-docstring
-class NeuralNetRegressor(NeuralNet, RegressorMixin):
+class NeuralNetRegressor(RegressorMixin, NeuralNet):
     __doc__ = get_neural_net_reg_doc(NeuralNet.__doc__)
 
     def __init__(
diff --git a/skorch/tests/test_helper.py b/skorch/tests/test_helper.py
index f9ddb8528..d7d553d58 100644
--- a/skorch/tests/test_helper.py
+++ b/skorch/tests/test_helper.py
@@ -437,6 +437,7 @@ def test_grid_search_with_slds_works(
             self, slds, y, classifier_module):
         from sklearn.model_selection import GridSearchCV
         from skorch import NeuralNetClassifier
+        from skorch.utils import to_numpy
 
         net = NeuralNetClassifier(
             classifier_module,
@@ -450,12 +451,16 @@ def test_grid_search_with_slds_works(
         gs = GridSearchCV(
             net, params, refit=False, cv=3, scoring='accuracy', error_score='raise'
         )
-        gs.fit(slds, y)  # does not raise
+        # TODO: after sklearn > 1.6 is released, the to_numpy call should no longer be
+        # required and be removed, see:
+        # https://github.com/skorch-dev/skorch/pull/1078#discussion_r1887197261
+        gs.fit(slds, to_numpy(y))  # does not raise
 
     def test_grid_search_with_slds_and_internal_split_works(
             self, slds, y, classifier_module):
         from sklearn.model_selection import GridSearchCV
         from skorch import NeuralNetClassifier
+        from skorch.utils import to_numpy
 
         net = NeuralNetClassifier(classifier_module)
         params = {
@@ -465,7 +470,10 @@ def test_grid_search_with_slds_and_internal_split_works(
         gs = GridSearchCV(
             net, params, refit=True, cv=3, scoring='accuracy', error_score='raise'
         )
-        gs.fit(slds, y)  # does not raise
+        # TODO: after sklearn > 1.6 is released, the to_numpy call should no longer be
+        # required and be removed, see:
+        # https://github.com/skorch-dev/skorch/pull/1078#discussion_r1887197261
+        gs.fit(slds, to_numpy(y))  # does not raise
 
     def test_grid_search_with_slds_X_and_slds_y(
             self, slds, slds_y, classifier_module):