diff --git a/a2c_sb3_panda_reach.py b/a2c_sb3_panda_reach.py index c3aa767acd0c048cb8155eea8ad5119b8f312755..f2ac80b8cea2dec346611b487b2c88afda6e99b1 100644 --- a/a2c_sb3_panda_reach.py +++ b/a2c_sb3_panda_reach.py @@ -17,17 +17,17 @@ total_timesteps = 100000 config = { "policy_type": "MlpPolicy", "total_timesteps": total_timesteps, - "env_name": "CartPole-v1", + "env_name": "PandaReachJointsDense-v3", } wandb.login() run = wandb.init( - project="a2c-PandaReachJointsDense-v2", + project="a2c-PandaReachJointsDense-v3", config=config, sync_tensorboard=True, # auto-upload sb3's tensorboard metrics monitor_gym=True, # auto-upload the videos of agents playing the game save_code=True, # optional ) -env_id = "PandaReachJointsDense-v2" +env_id = "PandaReachJointsDense-v3" # Register the environment register(id=env_id, entry_point='gym.envs.classic_control:CartPoleEnv', max_episode_steps=500)