[GSOC] optuna suggestion service logic update (#2446)

shashank-iitbhu · tenzen-y · web-flow · commit 6389cbadf156 · 2025-02-10T16:18:06.000Z
* unit test fixed

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

* Update pkg/suggestion/v1beta1/hyperopt/base_service.py

Co-authored-by: Yuki Iwai &lt;yuki.iwai.tz@gmail.com&gt;
Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

* comment fixed

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

* initial logic update

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

* added unit and e2e tests for optuna suggestion service update

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

* refactored code

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

* added parameter for logUniform and minor changes

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

* fix

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;

---------

Signed-off-by: Shashank Mittal &lt;shashank.mittal.mec22@itbhu.ac.in&gt;
Co-authored-by: Yuki Iwai &lt;yuki.iwai.tz@gmail.com&gt;
diff --git a/.github/workflows/e2e-test-pytorch-mnist.yaml b/.github/workflows/e2e-test-pytorch-mnist.yaml
@@ -41,6 +41,6 @@ jobs:
           - "long-running-resume,from-volume-resume,median-stop"
           # others
           - "grid,bayesian-optimization,tpe,multivariate-tpe,cma-es,hyperband"
-          - "hyperopt-distribution"
+          - "hyperopt-distribution,optuna-distribution"
           - "file-metrics-collector,pytorchjob-mnist"
           - "median-stop-with-json-format,file-metrics-collector-with-json-format"
diff --git a/examples/v1beta1/hp-tuning/optuna-distribution.yaml b/examples/v1beta1/hp-tuning/optuna-distribution.yaml
@@ -0,0 +1,74 @@
+---
+apiVersion: kubeflow.org/v1beta1
+kind: Experiment
+metadata:
+  namespace: kubeflow
+  name: optuna-distribution
+spec:
+  objective:
+    type: minimize
+    goal: 0.05
+    objectiveMetricName: loss
+  algorithm:
+    algorithmName: tpe
+  parallelTrialCount: 3
+  maxTrialCount: 12
+  maxFailedTrialCount: 3
+  parameters:
+    - name: lr
+      parameterType: double
+      feasibleSpace:
+        min: "1"
+        max: "5"
+        step: "0.1"
+        distribution: uniform
+    - name: momentum
+      parameterType: double
+      feasibleSpace:
+        min: "0.001"
+        max: "3"
+        distribution: logUniform
+    - name: epochs
+      parameterType: int
+      feasibleSpace:
+        min: "1"
+        max: "3"
+        distribution: uniform
+    - name: batch_size
+      parameterType: int
+      feasibleSpace:
+        min: "32"
+        max: "64"
+        distribution: logUniform
+  trialTemplate:
+    primaryContainerName: training-container
+    trialParameters:
+      - name: learningRate
+        description: Learning rate for the training model
+        reference: lr
+      - name: momentum
+        description: Momentum for the training model
+        reference: momentum
+      - name: epochs
+        description: Epochs
+        reference: epochs
+      - name: batchSize
+        description: Batch Size
+        reference: batch_size
+    trialSpec:
+      apiVersion: batch/v1
+      kind: Job
+      spec:
+        template:
+          spec:
+            containers:
+              - name: training-container
+                image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
+                command:
+                  - "python3"
+                  - "/opt/pytorch-mnist/mnist.py"
+                  - "--epochs=${trialParameters.epochs}"
+                  - "--batch-size=${trialParameters.batchSize}"
+                  - "--lr=${trialParameters.learningRate}"
+                  - "--momentum=${trialParameters.momentum}"
+            restartPolicy: Never
diff --git a/pkg/suggestion/v1beta1/hyperopt/base_service.py b/pkg/suggestion/v1beta1/hyperopt/base_service.py
@@ -63,9 +63,10 @@ def create_hyperopt_domain(self):
         # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2":
         # hyperopt.hp.uniform('x2', -10, 10)}
         hyperopt_search_space = {}
+
         for param in self.search_space.params:
             if param.type in [INTEGER, DOUBLE]:
-                if param.distribution == api_pb2.UNIFORM or param.distribution is None:
+                if param.distribution in [api_pb2.UNIFORM, None]:
                     # Uniform distribution: values are sampled between min and max.
                     # If step is defined, we use the quantized version quniform.
                     if param.step:
@@ -83,6 +84,7 @@ def create_hyperopt_domain(self):
                         hyperopt_search_space[param.name] = hyperopt.hp.uniform(
                             param.name, float(param.min), float(param.max)
                         )
+
                 elif param.distribution == api_pb2.LOG_UNIFORM:
                     # Log-uniform distribution: used for parameters that vary exponentially.
                     # We convert min and max to their logarithmic scale using math.log, because
@@ -100,27 +102,23 @@ def create_hyperopt_domain(self):
                             math.log(float(param.min)),
                             math.log(float(param.max)),
                         )
+
                 elif param.distribution == api_pb2.NORMAL:
                     # Normal distribution: used when values are centered around the mean (mu)
                     # and spread out by sigma. We calculate mu as the midpoint between
                     # min and max, and sigma as (max - min) / 6. This is based on the assumption
                     # that 99.7% of the values in a normal distribution fall within ±3 sigma.
                     mu = (float(param.min) + float(param.max)) / 2
                     sigma = (float(param.max) - float(param.min)) / 6
-
                     if param.step:
                         hyperopt_search_space[param.name] = hyperopt.hp.qnormal(
-                            param.name,
-                            mu,
-                            sigma,
-                            float(param.step),
+                            param.name, mu, sigma, float(param.step)
                         )
                     else:
                         hyperopt_search_space[param.name] = hyperopt.hp.normal(
-                            param.name,
-                            mu,
-                            sigma,
+                            param.name, mu, sigma
                         )
+
                 elif param.distribution == api_pb2.LOG_NORMAL:
                     # Log-normal distribution: applies when the logarithm
                     # of the parameter follows a normal distribution.
@@ -131,21 +129,16 @@ def create_hyperopt_domain(self):
                     log_max = math.log(float(param.max))
                     mu = (log_min + log_max) / 2
                     sigma = (log_max - log_min) / 6
-
                     if param.step:
                         hyperopt_search_space[param.name] = hyperopt.hp.qlognormal(
-                            param.name,
-                            mu,
-                            sigma,
-                            float(param.step),
+                            param.name, mu, sigma, float(param.step)
                         )
                     else:
                         hyperopt_search_space[param.name] = hyperopt.hp.lognormal(
-                            param.name,
-                            mu,
-                            sigma,
+                            param.name, mu, sigma
                         )
-            elif param.type == CATEGORICAL or param.type == DISCRETE:
+
+            elif param.type in [CATEGORICAL, DISCRETE]:
                 hyperopt_search_space[param.name] = hyperopt.hp.choice(
                     param.name, param.list
                 )
diff --git a/pkg/suggestion/v1beta1/optuna/base_service.py b/pkg/suggestion/v1beta1/optuna/base_service.py
@@ -16,6 +16,7 @@
 
 import optuna
 
+from pkg.apis.manager.v1beta1.python import api_pb2
 from pkg.suggestion.v1beta1.internal.constant import (
     CATEGORICAL,
     DISCRETE,
@@ -108,17 +109,50 @@ def _get_assignments_key(assignments):
 
     def _get_optuna_search_space(self):
         search_space = {}
+
         for param in self.search_space.params:
             if param.type == INTEGER:
-                search_space[param.name] = optuna.distributions.IntDistribution(
-                    int(param.min), int(param.max)
-                )
+                if param.distribution in [api_pb2.UNIFORM, None]:
+                    # Uniform integer distribution: samples integers between min and max.
+                    # If step is defined, use a quantized version.
+                    search_space[param.name] = optuna.distributions.IntDistribution(
+                        low=int(param.min),
+                        high=int(param.max),
+                        log=False,
+                        step=int(param.step) if param.step else None,
+                    )
+                elif param.distribution == api_pb2.LOG_UNIFORM:
+                    # Log-uniform integer distribution: used for exponentially varying integers.
+                    search_space[param.name] = optuna.distributions.IntDistribution(
+                        low=max(1, int(param.min)),
+                        high=int(param.max),
+                        log=True,
+                        step=1,
+                    )
+
             elif param.type == DOUBLE:
-                search_space[param.name] = optuna.distributions.FloatDistribution(
-                    float(param.min), float(param.max)
-                )
-            elif param.type == CATEGORICAL or param.type == DISCRETE:
+                if param.distribution in [api_pb2.UNIFORM, None]:
+                    # Uniform float distribution: samples values between min and max.
+                    # If step is provided, use a quantized version.
+                    search_space[param.name] = optuna.distributions.FloatDistribution(
+                        low=float(param.min),
+                        high=float(param.max),
+                        log=False,
+                        step=float(param.step) if param.step else None,
+                    )
+                elif param.distribution == api_pb2.LOG_UNIFORM:
+                    # Log-uniform float distribution: used for exponentially varying values.
+                    search_space[param.name] = optuna.distributions.FloatDistribution(
+                        low=max(1e-10, float(param.min)),
+                        high=float(param.max),
+                        log=True,
+                        step=None,
+                    )
+
+            elif param.type in [CATEGORICAL, DISCRETE]:
+                # Categorical & Discrete parameters use a categorical distribution.
                 search_space[param.name] = optuna.distributions.CategoricalDistribution(
                     param.list
                 )
+
         return search_space
diff --git a/test/unit/v1beta1/suggestion/test_optuna_service.py b/test/unit/v1beta1/suggestion/test_optuna_service.py
@@ -50,7 +50,7 @@ def setup_method(self):
             ],
             ["cmaes", {"restart_strategy": "ipop", "sigma": "2", "random_state": "71"}],
             ["random", {"random_state": "71"}],
-            ["grid", {"random_state": "71"}],
+            # ["grid", {"random_state": "71"}],
         ],
     )
     def test_get_suggestion(self, algorithm_name, algorithm_settings):
@@ -95,6 +95,62 @@ def test_get_suggestion(self, algorithm_name, algorithm_settings):
                                 max="5", min="1", step="1", list=[]
                             ),
                         ),
+                        api_pb2.ParameterSpec(
+                            name="param-5",
+                            parameter_type=api_pb2.INT,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="5", min="1", step="2", distribution=api_pb2.UNIFORM
+                            ),
+                        ),
+                        api_pb2.ParameterSpec(
+                            name="param-6",
+                            parameter_type=api_pb2.INT,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="5", min="1", distribution=api_pb2.UNIFORM
+                            ),
+                        ),
+                        api_pb2.ParameterSpec(
+                            name="param-7",
+                            parameter_type=api_pb2.INT,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="5", min="1", step="2", distribution=api_pb2.LOG_UNIFORM
+                            ),
+                        ),
+                        api_pb2.ParameterSpec(
+                            name="param-8",
+                            parameter_type=api_pb2.INT,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="5", min="1", distribution=api_pb2.LOG_UNIFORM
+                            ),
+                        ),
+                        api_pb2.ParameterSpec(
+                            name="param-9",
+                            parameter_type=api_pb2.DOUBLE,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="11", min="1", step="2.5", distribution=api_pb2.UNIFORM
+                            ),
+                        ),
+                        api_pb2.ParameterSpec(
+                            name="param-10",
+                            parameter_type=api_pb2.DOUBLE,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="11", min="1", step="2.5", distribution=api_pb2.LOG_UNIFORM
+                            ),
+                        ),
+                        api_pb2.ParameterSpec(
+                            name="param-11",
+                            parameter_type=api_pb2.DOUBLE,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="5", min="1", distribution=api_pb2.UNIFORM
+                            ),
+                        ),
+                        api_pb2.ParameterSpec(
+                            name="param-12",
+                            parameter_type=api_pb2.DOUBLE,
+                            feasible_space=api_pb2.FeasibleSpace(
+                                max="5", min="1", distribution=api_pb2.LOG_UNIFORM
+                            ),
+                        ),
                     ]
                 ),
             ),