diff --git a/DDPG.py b/DDPG.py index e2310e640..b0861f8ed 100644 --- a/DDPG.py +++ b/DDPG.py @@ -101,6 +101,7 @@ def train(self, replay_buffer, batch_size=64): def save(self, filename): torch.save(self.critic.state_dict(), filename + "_critic") torch.save(self.critic_optimizer.state_dict(), filename + "_critic_optimizer") + torch.save(self.actor.state_dict(), filename + "_actor") torch.save(self.actor_optimizer.state_dict(), filename + "_actor_optimizer") @@ -108,6 +109,9 @@ def save(self, filename): def load(self, filename): self.critic.load_state_dict(torch.load(filename + "_critic")) self.critic_optimizer.load_state_dict(torch.load(filename + "_critic_optimizer")) + self.critic_target = copy.deepcopy(self.critic) + self.actor.load_state_dict(torch.load(filename + "_actor")) self.actor_optimizer.load_state_dict(torch.load(filename + "_actor_optimizer")) - + self.actor_target = copy.deepcopy(self.actor) + \ No newline at end of file diff --git a/OurDDPG.py b/OurDDPG.py index 71396aa83..0e73afa92 100644 --- a/OurDDPG.py +++ b/OurDDPG.py @@ -100,6 +100,7 @@ def train(self, replay_buffer, batch_size=100): def save(self, filename): torch.save(self.critic.state_dict(), filename + "_critic") torch.save(self.critic_optimizer.state_dict(), filename + "_critic_optimizer") + torch.save(self.actor.state_dict(), filename + "_actor") torch.save(self.actor_optimizer.state_dict(), filename + "_actor_optimizer") @@ -107,6 +108,9 @@ def save(self, filename): def load(self, filename): self.critic.load_state_dict(torch.load(filename + "_critic")) self.critic_optimizer.load_state_dict(torch.load(filename + "_critic_optimizer")) + self.critic_target = copy.deepcopy(self.critic) + self.actor.load_state_dict(torch.load(filename + "_actor")) self.actor_optimizer.load_state_dict(torch.load(filename + "_actor_optimizer")) - + self.actor_target = copy.deepcopy(self.actor) + \ No newline at end of file diff --git a/TD3.py b/TD3.py index 4d6bd1d2b..41f717264 100644 --- a/TD3.py +++ b/TD3.py @@ -155,6 +155,7 @@ def train(self, replay_buffer, batch_size=100): def save(self, filename): torch.save(self.critic.state_dict(), filename + "_critic") torch.save(self.critic_optimizer.state_dict(), filename + "_critic_optimizer") + torch.save(self.actor.state_dict(), filename + "_actor") torch.save(self.actor_optimizer.state_dict(), filename + "_actor_optimizer") @@ -162,5 +163,9 @@ def save(self, filename): def load(self, filename): self.critic.load_state_dict(torch.load(filename + "_critic")) self.critic_optimizer.load_state_dict(torch.load(filename + "_critic_optimizer")) + self.critic_target = copy.deepcopy(self.critic) + self.actor.load_state_dict(torch.load(filename + "_actor")) self.actor_optimizer.load_state_dict(torch.load(filename + "_actor_optimizer")) + self.actor_target = copy.deepcopy(self.actor) + \ No newline at end of file diff --git a/main.py b/main.py index 3429ed53e..d5e3f9f07 100644 --- a/main.py +++ b/main.py @@ -38,7 +38,7 @@ def eval_policy(policy, env_name, seed, eval_episodes=10): parser.add_argument("--policy", default="TD3") # Policy name (TD3, DDPG or OurDDPG) parser.add_argument("--env", default="HalfCheetah-v2") # OpenAI gym environment name parser.add_argument("--seed", default=0, type=int) # Sets Gym, PyTorch and Numpy seeds - parser.add_argument("--start_timesteps", default=1e4, type=int) # Time steps initial random policy is used + parser.add_argument("--start_timesteps", default=25e3, type=int)# Time steps initial random policy is used parser.add_argument("--eval_freq", default=5e3, type=int) # How often (time steps) we evaluate parser.add_argument("--max_timesteps", default=1e6, type=int) # Max time steps to run environment parser.add_argument("--expl_noise", default=0.1) # Std of Gaussian exploration noise diff --git a/run_experiments.sh b/run_experiments.sh index b2a7ab04b..02c484aba 100644 --- a/run_experiments.sh +++ b/run_experiments.sh @@ -6,27 +6,28 @@ for ((i=0;i<10;i+=1)) do python main.py \ --policy "TD3" \ - --env "HalfCheetah-v2" \ - --seed $i \ - --start_timesteps 10000 + --env "HalfCheetah-v3" \ + --seed $i python main.py \ --policy "TD3" \ - --env "Hopper-v2" \ - --seed $i \ - --start_timesteps 1000 + --env "Hopper-v3" \ + --seed $i python main.py \ --policy "TD3" \ - --env "Walker2d-v2" \ - --seed $i \ - --start_timesteps 1000 + --env "Walker2d-v3" \ + --seed $i python main.py \ --policy "TD3" \ - --env "Ant-v2" \ - --seed $i \ - --start_timesteps 10000 + --env "Ant-v3" \ + --seed $i + + python main.py \ + --policy "TD3" \ + --env "Humanoid-v3" \ + --seed $i python main.py \ --policy "TD3" \