image

fd4364e4 · number_cruncher · fa4e312e · fd4364e4 · fd4364e4 · fd4364e4
Commit fd4364e4 authored 2 months ago by number_cruncher
--- a/README.md
+++ b/README.md
@@ -23,6 +23,8 @@ Now that you have trained your model, it is time to evaluate its performance. Ru
 From the openai gym wiki we know that the environment counts as solved when the average reward is greater or equal to 195 for over 100 consecutuve trials.
 From the evaluation script i used the success rate is 1.0 when we allow the maximum number of steps the environment offers.
+![REINFORCE CartPole](reinforce_cartpole_dr_0.5.png)
 ## Familiarization with a complete RL pipeline: Application to training a robotic arm
 Stable-Baselines3 (SB3) is a high-level RL library that provides various algorithms and integrated tools to easily train and test reinforcement learning models.
@@ -36,7 +38,8 @@ Stable-Baselines3 (SB3) is a high-level RL library that provides various algorit
 🛠 Share the link of the wandb run in the `README.md` file.
 wandb: https://wandb.ai/lennartecl-centrale-lyon/sb3?nw=nwuserlennartecl
-hugging: https://huggingface.co/lennartoe/Cartpole-v1/tree/main
+huggingface: https://huggingface.co/lennartoe/Cartpole-v1/tree/main
 ### Full workflow with panda-gym
@@ -46,5 +49,6 @@ hugging: https://huggingface.co/lennartoe/Cartpole-v1/tree/main
 > Share all the code in `a2c_sb3_panda_reach.py`. Share the link of the wandb run and the trained model in the `README.md` file.
 wandb: https://wandb.ai/lennartecl-centrale-lyon/pandasgym_sb3?nw=nwuserlennartecl
-hugging: https://huggingface.co/lennartoe/PandaReachJointsDense-v3/tree/main
+huggingface: https://huggingface.co/lennartoe/PandaReachJointsDense-v3/tree/main
--- a/a2c_sb3_cartpole.py
+++ b/a2c_sb3_cartpole.py
@@ -20,7 +20,7 @@ run = wandb.init(
    save_code=True,
 )
-env = gym.make("CartPole-v1", render_mode="rgb_array")
+env = gym.make("CartPole-v1")
 model = A2C("MlpPolicy", env, verbose=1, tensorboard_log=f"runs/{run.id}")
 #model = A2C("MlpPolicy", env, )
@@ -31,7 +31,6 @@ obs = vec_env.reset()
 for i in range(1000):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
-    vec_env.render("human")
 run.finish()

--- a/a2c_sb3_panda_reach.py
+++ b/a2c_sb3_panda_reach.py
@@ -10,7 +10,7 @@ from huggingface_sb3 import package_to_hub
 # from documentation of wandb
 config = {
    "policy_type": "MultiInputPolicy",
-    "total_timesteps": 50000,
+    "total_timesteps": 500000,
    "env_name": "PandaReachJointsDense-v3",
 }
 run = wandb.init(
@@ -21,7 +21,7 @@ run = wandb.init(
    save_code=True,
 )
-env = gym.make("PandaReachJointsDense-v3", render_mode="rgb_array")
+env = gym.make("PandaReachJointsDense-v3")
 model = A2C("MultiInputPolicy", env, verbose=1, tensorboard_log=f"runs/{run.id}")
 #model = A2C("MlpPolicy", env, )
@@ -32,10 +32,6 @@ obs = vec_env.reset()
 for i in range(1000):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
-    vec_env.render("human")
-    # VecEnv resets automatically
-    # if done:
-    #   obs = vec_env.reset()
 run.finish()

--- a/reinforce_cartpole_dr_0.5.png
+++ b/reinforce_cartpole_dr_0.5.png