Add mujoco reward lib (#1569)

oleflb · web-flow · commit 381d5533fa33 · 2025-01-11T16:51:54.000Z
* add reward lib and format tomls

* add taplo config for pyproject toml

* add rewards lib

* remove signs from rewards

* rename things
diff --git a/.taplo.toml b/.taplo.toml
@@ -20,6 +20,21 @@ keys = ["tool.uv"]
 
 [rule.formatting]
 array_auto_collapse = false
+reorder_arrays = true
+
+[[rule]]
+include = ["**/pyproject.toml"]
+keys = ["tool.uv.sources"]
+
+[rule.formatting]
+reorder_keys = true
+
+[[rule]]
+include = ["**/pyproject.toml"]
+keys = ["project"]
+
+[rule.formatting]
+reorder_arrays = true
 
 [[rule]]
 include = ["**/Cargo.toml"]
diff --git a/tools/machine-learning/mujoco/packages/kinematics/pyproject.toml b/tools/machine-learning/mujoco/packages/kinematics/pyproject.toml
@@ -3,7 +3,7 @@ name = "kinematics"
 version = "0.1.0"
 description = "Add your description here"
 requires-python = ">=3.10"
-dependencies = ["transforms", "numpy", "robot-dimensions"]
+dependencies = ["numpy", "robot-dimensions", "transforms"]
 
 [build-system]
 requires = ["hatchling"]
diff --git a/tools/machine-learning/mujoco/packages/nao_env/pyproject.toml b/tools/machine-learning/mujoco/packages/nao_env/pyproject.toml
@@ -3,7 +3,7 @@ name = "nao-env"
 version = "0.1.0"
 description = "Add your description here"
 requires-python = ">=3.10"
-dependencies = ["gymnasium", "numpy"]
+dependencies = ["gymnasium", "nao-interface", "numpy", "rewards"]
 
 [build-system]
 requires = ["hatchling"]
diff --git a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standup.py b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standup.py
@@ -2,9 +2,11 @@
 from typing import ClassVar
 
 import numpy as np
+import rewards
 from gymnasium import utils
 from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
 from gymnasium.spaces import Box
+from nao_interface import Nao
 
 DEFAULT_CAMERA_CONFIG = {
     "trackbodyid": 1,
@@ -58,16 +60,18 @@ def _get_obs(self) -> np.ndarray:
 
     def step(self, action):
         self.do_simulation(action, self.frame_skip)
-        data = self.data
+        nao = Nao(self.model, self.data)
 
-        head_center_id = self.model.site("head_center").id
-        head_center_z = data.site_xpos[head_center_id][2]
-        uph_cost = (head_center_z - 0) / self.model.opt.timestep
+        head_elevation_reward = rewards.head_height(nao)
+        control_amplitude_penalty = 0.1 * rewards.ctrl_amplitude(nao)
+        impact_penalty = min(0.5e-6 * rewards.impact_forces(nao), 10)
 
-        quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
-        quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
-        quad_impact_cost = min(quad_impact_cost, 10)
-        reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
+        reward = (
+            head_elevation_reward
+            - control_amplitude_penalty
+            - impact_penalty
+            + 1
+        )
 
         if self.render_mode == "human":
             self.render()
@@ -78,9 +82,9 @@ def step(self, action):
             False,
             False,
             {
-                "reward_linup": uph_cost,
-                "reward_quadctrl": -quad_ctrl_cost,
-                "reward_impact": -quad_impact_cost,
+                "head_elevation_reward": head_elevation_reward,
+                "control_amplitude_penalty": control_amplitude_penalty,
+                "impact_penalty": impact_penalty,
             },
         )
 
diff --git a/tools/machine-learning/mujoco/packages/rewards/pyproject.toml b/tools/machine-learning/mujoco/packages/rewards/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+version = "0.1.0"
+name = "rewards"
+description = "Add your description here"
+requires-python = ">=3.10"
+dependencies = ["nao-interface", "numpy"]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/tools/machine-learning/mujoco/packages/rewards/src/rewards/__init__.py b/tools/machine-learning/mujoco/packages/rewards/src/rewards/__init__.py
@@ -0,0 +1,15 @@
+import numpy as np
+from nao_interface import Nao
+
+
+def ctrl_amplitude(nao: Nao) -> float:
+    return np.square(nao.data.ctrl).sum()
+
+
+def impact_forces(nao: Nao) -> float:
+    return np.square(nao.data.cfrc_ext).sum()
+
+
+def head_height(nao: Nao) -> float:
+    head_center_id = nao.model.site_name2id("head_center")
+    return nao.data.site_xpos[head_center_id][2]
diff --git a/tools/machine-learning/mujoco/packages/throwing/pyproject.toml b/tools/machine-learning/mujoco/packages/throwing/pyproject.toml
@@ -3,7 +3,7 @@ name = "throwing"
 version = "0.1.0"
 description = "Add your description here"
 requires-python = ">=3.10"
-dependencies = ["numpy", "mujoco"]
+dependencies = ["mujoco", "numpy"]
 
 [build-system]
 requires = ["hatchling"]
diff --git a/tools/machine-learning/mujoco/pyproject.toml b/tools/machine-learning/mujoco/pyproject.toml
@@ -1,4 +1,9 @@
 [project]
+name = "mujoco-env"
+description = "Add your description here"
+version = "0.1.0"
+requires-python = ">=3.10"
+
 dependencies = [
   "gymnasium[classic-control]",
   "ipykernel>=6.29.5",
@@ -9,28 +14,27 @@ dependencies = [
   "scipy>=1.14.1",
   "stable-baselines3>=2.3.2",
   "tensorboard>=2.18.0",
+  "wandb>=0.18.5",
+  # Project dependencies
   "kinematics",
-  "nao-interface",
   "nao-env",
-  "transforms",
-  "walking-engine",
+  "nao-interface",
+  "rewards",
   "robot-dimensions",
   "throwing",
-  "wandb>=0.18.5",
+  "transforms",
+  "walking-engine",
 ]
-name = "mujoco-env"
-version = "0.1.0"
-description = "Add your description here"
-requires-python = ">=3.10"
 
 [tool.uv.sources]
 kinematics = { workspace = true }
-nao_interface = { workspace = true }
 nao_env = { workspace = true }
-transforms = { workspace = true }
-walking_engine = { workspace = true }
+nao_interface = { workspace = true }
+rewards = { workspace = true }
 robot_dimensions = { workspace = true }
 throwing = { workspace = true }
+transforms = { workspace = true }
+walking_engine = { workspace = true }
 
 [tool.uv.workspace]
 members = ["packages/*"]
diff --git a/tools/machine-learning/mujoco/uv.lock b/tools/machine-learning/mujoco/uv.lock