Skip to content

Commit 381d553

Browse files
authored
Add mujoco reward lib (#1569)
* add reward lib and format tomls * add taplo config for pyproject toml * add rewards lib * remove signs from rewards * rename things
1 parent c7a9144 commit 381d553

File tree

9 files changed

+95
-25
lines changed

9 files changed

+95
-25
lines changed

.taplo.toml

+15
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,21 @@ keys = ["tool.uv"]
2020

2121
[rule.formatting]
2222
array_auto_collapse = false
23+
reorder_arrays = true
24+
25+
[[rule]]
26+
include = ["**/pyproject.toml"]
27+
keys = ["tool.uv.sources"]
28+
29+
[rule.formatting]
30+
reorder_keys = true
31+
32+
[[rule]]
33+
include = ["**/pyproject.toml"]
34+
keys = ["project"]
35+
36+
[rule.formatting]
37+
reorder_arrays = true
2338

2439
[[rule]]
2540
include = ["**/Cargo.toml"]

tools/machine-learning/mujoco/packages/kinematics/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "kinematics"
33
version = "0.1.0"
44
description = "Add your description here"
55
requires-python = ">=3.10"
6-
dependencies = ["transforms", "numpy", "robot-dimensions"]
6+
dependencies = ["numpy", "robot-dimensions", "transforms"]
77

88
[build-system]
99
requires = ["hatchling"]

tools/machine-learning/mujoco/packages/nao_env/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "nao-env"
33
version = "0.1.0"
44
description = "Add your description here"
55
requires-python = ">=3.10"
6-
dependencies = ["gymnasium", "numpy"]
6+
dependencies = ["gymnasium", "nao-interface", "numpy", "rewards"]
77

88
[build-system]
99
requires = ["hatchling"]

tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standup.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
from typing import ClassVar
33

44
import numpy as np
5+
import rewards
56
from gymnasium import utils
67
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
78
from gymnasium.spaces import Box
9+
from nao_interface import Nao
810

911
DEFAULT_CAMERA_CONFIG = {
1012
"trackbodyid": 1,
@@ -58,16 +60,18 @@ def _get_obs(self) -> np.ndarray:
5860

5961
def step(self, action):
6062
self.do_simulation(action, self.frame_skip)
61-
data = self.data
63+
nao = Nao(self.model, self.data)
6264

63-
head_center_id = self.model.site("head_center").id
64-
head_center_z = data.site_xpos[head_center_id][2]
65-
uph_cost = (head_center_z - 0) / self.model.opt.timestep
65+
head_elevation_reward = rewards.head_height(nao)
66+
control_amplitude_penalty = 0.1 * rewards.ctrl_amplitude(nao)
67+
impact_penalty = min(0.5e-6 * rewards.impact_forces(nao), 10)
6668

67-
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
68-
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
69-
quad_impact_cost = min(quad_impact_cost, 10)
70-
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
69+
reward = (
70+
head_elevation_reward
71+
- control_amplitude_penalty
72+
- impact_penalty
73+
+ 1
74+
)
7175

7276
if self.render_mode == "human":
7377
self.render()
@@ -78,9 +82,9 @@ def step(self, action):
7882
False,
7983
False,
8084
{
81-
"reward_linup": uph_cost,
82-
"reward_quadctrl": -quad_ctrl_cost,
83-
"reward_impact": -quad_impact_cost,
85+
"head_elevation_reward": head_elevation_reward,
86+
"control_amplitude_penalty": control_amplitude_penalty,
87+
"impact_penalty": impact_penalty,
8488
},
8589
)
8690

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[project]
2+
version = "0.1.0"
3+
name = "rewards"
4+
description = "Add your description here"
5+
requires-python = ">=3.10"
6+
dependencies = ["nao-interface", "numpy"]
7+
8+
[build-system]
9+
requires = ["hatchling"]
10+
build-backend = "hatchling.build"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import numpy as np
2+
from nao_interface import Nao
3+
4+
5+
def ctrl_amplitude(nao: Nao) -> float:
6+
return np.square(nao.data.ctrl).sum()
7+
8+
9+
def impact_forces(nao: Nao) -> float:
10+
return np.square(nao.data.cfrc_ext).sum()
11+
12+
13+
def head_height(nao: Nao) -> float:
14+
head_center_id = nao.model.site_name2id("head_center")
15+
return nao.data.site_xpos[head_center_id][2]

tools/machine-learning/mujoco/packages/throwing/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "throwing"
33
version = "0.1.0"
44
description = "Add your description here"
55
requires-python = ">=3.10"
6-
dependencies = ["numpy", "mujoco"]
6+
dependencies = ["mujoco", "numpy"]
77

88
[build-system]
99
requires = ["hatchling"]

tools/machine-learning/mujoco/pyproject.toml

+15-11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
[project]
2+
name = "mujoco-env"
3+
description = "Add your description here"
4+
version = "0.1.0"
5+
requires-python = ">=3.10"
6+
27
dependencies = [
38
"gymnasium[classic-control]",
49
"ipykernel>=6.29.5",
@@ -9,28 +14,27 @@ dependencies = [
914
"scipy>=1.14.1",
1015
"stable-baselines3>=2.3.2",
1116
"tensorboard>=2.18.0",
17+
"wandb>=0.18.5",
18+
# Project dependencies
1219
"kinematics",
13-
"nao-interface",
1420
"nao-env",
15-
"transforms",
16-
"walking-engine",
21+
"nao-interface",
22+
"rewards",
1723
"robot-dimensions",
1824
"throwing",
19-
"wandb>=0.18.5",
25+
"transforms",
26+
"walking-engine",
2027
]
21-
name = "mujoco-env"
22-
version = "0.1.0"
23-
description = "Add your description here"
24-
requires-python = ">=3.10"
2528

2629
[tool.uv.sources]
2730
kinematics = { workspace = true }
28-
nao_interface = { workspace = true }
2931
nao_env = { workspace = true }
30-
transforms = { workspace = true }
31-
walking_engine = { workspace = true }
32+
nao_interface = { workspace = true }
33+
rewards = { workspace = true }
3234
robot_dimensions = { workspace = true }
3335
throwing = { workspace = true }
36+
transforms = { workspace = true }
37+
walking_engine = { workspace = true }
3438

3539
[tool.uv.workspace]
3640
members = ["packages/*"]

tools/machine-learning/mujoco/uv.lock

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)