2
2
from typing import ClassVar
3
3
4
4
import numpy as np
5
+ import rewards
5
6
from gymnasium import utils
6
7
from gymnasium .envs .mujoco .mujoco_env import MujocoEnv
7
8
from gymnasium .spaces import Box
9
+ from nao_interface import Nao
8
10
9
11
DEFAULT_CAMERA_CONFIG = {
10
12
"trackbodyid" : 1 ,
@@ -58,16 +60,18 @@ def _get_obs(self) -> np.ndarray:
58
60
59
61
def step (self , action ):
60
62
self .do_simulation (action , self .frame_skip )
61
- data = self .data
63
+ nao = Nao ( self .model , self . data )
62
64
63
- head_center_id = self . model . site ( "head_center" ). id
64
- head_center_z = data . site_xpos [ head_center_id ][ 2 ]
65
- uph_cost = ( head_center_z - 0 ) / self . model . opt . timestep
65
+ head_elevation_reward = rewards . head_height ( nao )
66
+ control_amplitude_penalty = 0.1 * rewards . ctrl_amplitude ( nao )
67
+ impact_penalty = min ( 0.5e-6 * rewards . impact_forces ( nao ), 10 )
66
68
67
- quad_ctrl_cost = 0.1 * np .square (data .ctrl ).sum ()
68
- quad_impact_cost = 0.5e-6 * np .square (data .cfrc_ext ).sum ()
69
- quad_impact_cost = min (quad_impact_cost , 10 )
70
- reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
69
+ reward = (
70
+ head_elevation_reward
71
+ - control_amplitude_penalty
72
+ - impact_penalty
73
+ + 1
74
+ )
71
75
72
76
if self .render_mode == "human" :
73
77
self .render ()
@@ -78,9 +82,9 @@ def step(self, action):
78
82
False ,
79
83
False ,
80
84
{
81
- "reward_linup " : uph_cost ,
82
- "reward_quadctrl " : - quad_ctrl_cost ,
83
- "reward_impact " : - quad_impact_cost ,
85
+ "head_elevation_reward " : head_elevation_reward ,
86
+ "control_amplitude_penalty " : control_amplitude_penalty ,
87
+ "impact_penalty " : impact_penalty ,
84
88
},
85
89
)
86
90
0 commit comments