Skip to content

Commit 411c2f9

Browse files
authored
Merge pull request #243 from cpnota/release/0.7.0
Release/0.7.0
2 parents 9c44d41 + f72b0e4 commit 411c2f9

File tree

173 files changed

+4980
-2049
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+4980
-2049
lines changed

.github/workflows/python-package.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2+
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3+
4+
name: Python package
5+
6+
on:
7+
push:
8+
branches: [ master, develop ]
9+
pull_request:
10+
branches: [ master, develop ]
11+
12+
jobs:
13+
build:
14+
15+
runs-on: ubuntu-latest
16+
strategy:
17+
matrix:
18+
python-version: [3.6, 3.7, 3.8]
19+
20+
steps:
21+
- uses: actions/checkout@v2
22+
- name: Set up Python ${{ matrix.python-version }}
23+
uses: actions/setup-python@v2
24+
with:
25+
python-version: ${{ matrix.python-version }}
26+
- name: Install dependencies
27+
run: |
28+
sudo apt-get install swig
29+
sudo apt-get install unrar
30+
pip install torch==1.8.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
31+
make install
32+
AutoROM -v
33+
- name: Lint code
34+
run: |
35+
make lint
36+
- name: Run tests
37+
run: |
38+
make test

.github/workflows/python-publish.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# This workflow will upload a Python Package using Twine when a release is created
2+
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3+
4+
name: Upload Python Package
5+
6+
on:
7+
release:
8+
types: [created]
9+
10+
jobs:
11+
deploy:
12+
13+
runs-on: ubuntu-latest
14+
15+
steps:
16+
- uses: actions/checkout@v2
17+
- name: Set up Python
18+
uses: actions/setup-python@v2
19+
with:
20+
python-version: '3.x'
21+
- name: Install dependencies
22+
run: |
23+
python -m pip install --upgrade pip
24+
pip install setuptools wheel twine
25+
- name: Build and publish
26+
env:
27+
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28+
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29+
run: |
30+
python setup.py sdist bdist_wheel
31+
twine upload dist/*

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,14 @@ autonomous_learning_library.egg-info
1010
# editor
1111
.vscode
1212
.idea
13+
*.code-workspace
1314

1415
# non-committed code
1516
local
1617
legacy
1718
/runs
1819
/out
20+
21+
# notebooks
22+
*.ipynb
23+
*.ipynb_checkpoints

.travis.yml

Lines changed: 0 additions & 17 deletions
This file was deleted.

CONTRIBUTING.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ The unit tests may be run using:
2626
make test
2727
```
2828

29+
You can automatically format your code to match our code style using:
30+
31+
```
32+
make format
33+
```
34+
2935
Finally, you rebuild the documentation using:
3036

3137
```

Makefile

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
install:
22
pip install -e .[dev]
33

4-
lint:
5-
pylint all --rcfile=.pylintrc
4+
test: unit-test integration-test
65

7-
test:
6+
unit-test:
87
python -m unittest discover -s all -p "*test.py"
98

9+
integration-test:
10+
python -m unittest discover -s integration -p "*test.py"
11+
12+
lint:
13+
flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all
14+
15+
format:
16+
autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all
17+
1018
tensorboard:
1119
tensorboard --logdir runs
1220

all/.DS_Store

10 KB
Binary file not shown.

all/__init__.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,26 @@
1+
import all.agents
2+
import all.approximation
3+
import all.core
4+
import all.environments
5+
import all.logging
6+
import all.memory
17
import all.nn
8+
import all.optim
9+
import all.policies
10+
import all.presets
211
from all.core import State, StateArray
312

4-
__all__ = ['nn', 'State', 'StateArray']
13+
__all__ = [
14+
'agents',
15+
'approximation',
16+
'core',
17+
'environments',
18+
'logging',
19+
'memory',
20+
'nn',
21+
'optim',
22+
'policies',
23+
'presets',
24+
'State',
25+
'StateArray'
26+
]

all/agents/__init__.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,50 @@
11
from ._agent import Agent
2-
from .a2c import A2C
3-
from .c51 import C51
4-
from .ddpg import DDPG
5-
from .ddqn import DDQN
6-
from .dqn import DQN
7-
from .ppo import PPO
8-
from .rainbow import Rainbow
9-
from .sac import SAC
10-
from .vac import VAC
11-
from .vpg import VPG
12-
from .vqn import VQN
13-
from .vsarsa import VSarsa
2+
from ._multiagent import Multiagent
3+
from ._parallel_agent import ParallelAgent
4+
from .a2c import A2C, A2CTestAgent
5+
from .c51 import C51, C51TestAgent
6+
from .ddpg import DDPG, DDPGTestAgent
7+
from .ddqn import DDQN, DDQNTestAgent
8+
from .dqn import DQN, DQNTestAgent
9+
from .independent import IndependentMultiagent
10+
from .ppo import PPO, PPOTestAgent
11+
from .rainbow import Rainbow, RainbowTestAgent
12+
from .sac import SAC, SACTestAgent
13+
from .vac import VAC, VACTestAgent
14+
from .vpg import VPG, VPGTestAgent
15+
from .vqn import VQN, VQNTestAgent
16+
from .vsarsa import VSarsa, VSarsaTestAgent
17+
1418

1519
__all__ = [
20+
# Agent interfaces
1621
"Agent",
22+
"Multiagent",
23+
"ParallelAgent",
24+
# Agent implementations
1725
"A2C",
26+
"A2CTestAgent",
1827
"C51",
28+
"C51TestAgent",
1929
"DDPG",
30+
"DDPGTestAgent",
2031
"DDQN",
32+
"DDQNTestAgent",
2133
"DQN",
34+
"DQNTestAgent",
2235
"PPO",
36+
"PPOTestAgent",
2337
"Rainbow",
38+
"RainbowTestAgent",
2439
"SAC",
40+
"SACTestAgent",
2541
"VAC",
42+
"VACTestAgent",
2643
"VPG",
44+
"VPGTestAgent",
2745
"VQN",
46+
"VQNTestAgent",
2847
"VSarsa",
48+
"VSarsaTestAgent",
49+
"IndependentMultiagent",
2950
]

all/agents/_agent.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
from abc import ABC, abstractmethod
22
from all.optim import Schedulable
33

4+
45
class Agent(ABC, Schedulable):
56
"""
67
A reinforcement learning agent.
78
89
In reinforcement learning, an Agent learns by interacting with an Environment.
9-
Usually, an agent tries to maximize a reward signal.
10+
Usually, an Agent tries to maximize a reward signal.
1011
It does this by observing environment "states", taking "actions", receiving "rewards",
11-
and in doing so, learning which state-action pairs correlate with high rewards.
12-
An Agent implementation should encapsulate some particular reinforcement learning algorihthm.
12+
and learning which state-action pairs correlate with high rewards.
13+
An Agent implementation should encapsulate some particular reinforcement learning algorithm.
1314
"""
1415

1516
@abstractmethod
@@ -31,20 +32,3 @@ def act(self, state):
3132
Returns:
3233
torch.Tensor: The action to take at the current timestep.
3334
"""
34-
35-
@abstractmethod
36-
def eval(self, state):
37-
"""
38-
Select an action for the current timestep in evaluation mode.
39-
40-
Unlike act, this method should NOT update the internal parameters of the agent.
41-
Most of the time, this method should return the greedy action according to the current policy.
42-
This method is useful when using evaluation methodologies that distinguish between the performance
43-
of the agent during training and the performance of the resulting policy.
44-
45-
Args:
46-
state (all.environment.State): The environment state at the current timestep.
47-
48-
Returns:
49-
torch.Tensor: The action to take at the current timestep.
50-
"""

all/agents/_multiagent.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from abc import ABC, abstractmethod
2+
from all.optim import Schedulable
3+
4+
5+
class Multiagent(ABC, Schedulable):
6+
"""
7+
A multiagent RL agent. Differs from standard agents in that it accepts a multiagent state.
8+
9+
In reinforcement learning, an Agent learns by interacting with an Environment.
10+
Usually, an agent tries to maximize a reward signal.
11+
It does this by observing environment "states", taking "actions", receiving "rewards",
12+
and learning which state-action pairs correlate with high rewards.
13+
An Agent implementation should encapsulate some particular reinforcement learning algorithm.
14+
"""
15+
16+
@abstractmethod
17+
def act(self, multiagent_state):
18+
"""
19+
Select an action for the current timestep and update internal parameters.
20+
21+
In general, a reinforcement learning agent does several things during a timestep:
22+
1. Choose an action,
23+
2. Compute the TD error from the previous time step
24+
3. Update the value function and/or policy
25+
The order of these steps differs depending on the agent.
26+
This method allows the agent to do whatever is necessary for itself on a given timestep.
27+
However, the agent must ultimately return an action.
28+
29+
Args:
30+
multiagent_state (all.core.MultiagentState): The environment state at the current timestep.
31+
32+
Returns:
33+
torch.Tensor: The action for the current agent to take at the current timestep.
34+
"""

all/agents/_parallel_agent.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from abc import ABC, abstractmethod
2+
from all.optim import Schedulable
3+
4+
5+
class ParallelAgent(ABC, Schedulable):
6+
"""
7+
A reinforcement learning agent that chooses actions for multiple states simultaneously.
8+
Differs from SingleAgent in that it accepts a StateArray instead of a State to process
9+
input from multiple environments in parallel.
10+
11+
In reinforcement learning, an Agent learns by interacting with an Environment.
12+
Usually, an Agent tries to maximize a reward signal.
13+
It does this by observing environment "states", taking "actions", receiving "rewards",
14+
and learning which state-action pairs correlate with high rewards.
15+
An Agent implementation should encapsulate some particular reinforcement learning algorithm.
16+
"""
17+
18+
@abstractmethod
19+
def act(self, state_array):
20+
"""
21+
Select an action for the current timestep and update internal parameters.
22+
23+
In general, a reinforcement learning agent does several things during a timestep:
24+
1. Choose an action,
25+
2. Compute the TD error from the previous time step
26+
3. Update the value function and/or policy
27+
The order of these steps differs depending on the agent.
28+
This method allows the agent to do whatever is necessary for itself on a given timestep.
29+
However, the agent must ultimately return an action.
30+
31+
Args:
32+
state_array (all.environment.StateArray): An array of states for each parallel environment.
33+
34+
Returns:
35+
torch.Tensor: The actions to take for each parallel environmets.
36+
"""

all/agents/a2c.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
import torch
12
from torch.nn.functional import mse_loss
23
from all.logging import DummyWriter
34
from all.memory import NStepAdvantageBuffer
45
from ._agent import Agent
6+
from ._parallel_agent import ParallelAgent
57

68

7-
class A2C(Agent):
9+
class A2C(ParallelAgent):
810
"""
911
Advantage Actor-Critic (A2C).
1012
A2C is policy gradient method in the actor-critic family.
@@ -24,6 +26,7 @@ class A2C(Agent):
2426
n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout.
2527
writer (Writer): Used for logging.
2628
"""
29+
2730
def __init__(
2831
self,
2932
features,
@@ -60,9 +63,6 @@ def act(self, states):
6063
self._actions = self.policy.no_grad(self.features.no_grad(states)).sample()
6164
return self._actions
6265

63-
def eval(self, states):
64-
return self.policy.eval(self.features.eval(states))
65-
6666
def _train(self, next_states):
6767
if len(self._buffer) >= self._batch_size:
6868
# load trajectories from buffer
@@ -99,4 +99,12 @@ def _make_buffer(self):
9999
self.n_envs,
100100
discount_factor=self.discount_factor
101101
)
102-
102+
103+
104+
class A2CTestAgent(Agent):
105+
def __init__(self, features, policy):
106+
self.features = features
107+
self.policy = policy
108+
109+
def act(self, state):
110+
return self.policy.eval(self.features.eval(state)).sample()

0 commit comments

Comments
 (0)