1
1
2
+ from timeit import default_timer as timer
2
3
import torch
3
4
import numpy as np
4
5
from all .environments import State
@@ -26,6 +27,8 @@ def __init__(
26
27
self ._returns = []
27
28
self ._frame = 1
28
29
self ._episode = 1
30
+ self ._episode_start_times = [] * self ._n_envs
31
+ self ._episode_start_frames = [] * self ._n_envs
29
32
30
33
# test state
31
34
self ._test_episodes = 100
@@ -66,6 +69,8 @@ def _reset(self):
66
69
device = self ._envs [0 ].device
67
70
)
68
71
self ._returns = rewards
72
+ self ._episode_start_times = [timer ()] * self ._n_envs
73
+ self ._episode_start_frames = [self ._frame ] * self ._n_envs
69
74
70
75
def _step (self ):
71
76
states = self ._aggregate_states ()
@@ -80,10 +85,12 @@ def _step_envs(self, actions):
80
85
81
86
if env .done :
82
87
self ._returns [i ] += env .reward
83
- self ._log_training_episode (self ._returns [i ].item (), 0 )
88
+ self ._log_training_episode (self ._returns [i ].item (), self . _fps ( i ) )
84
89
env .reset ()
85
90
self ._returns [i ] = 0
86
91
self ._episode += 1
92
+ self ._episode_start_times [i ] = timer ()
93
+ self ._episode_start_frames [i ] = self ._frame
87
94
else :
88
95
action = actions [i ]
89
96
if action is not None :
@@ -134,5 +141,9 @@ def _aggregate_rewards(self):
134
141
device = self ._envs [0 ].device
135
142
)
136
143
144
+ def _fps (self , i ):
145
+ end_time = timer ()
146
+ return (self ._frame - self ._episode_start_frames [i ]) / (end_time - self ._episode_start_times [i ])
147
+
137
148
def _make_writer (self , agent_name , env_name , write_loss ):
138
149
return ExperimentWriter (self , agent_name , env_name , loss = write_loss )
0 commit comments