diff --git a/README.md b/README.md
index 0c50cc419c84a15a5e515b3706d6aa27ac978538..3c9e45e491730e3f7dcbfd581d99bc05d716396b 100644
--- a/README.md
+++ b/README.md
@@ -10,12 +10,12 @@
 - <b>Save:</b> [policy_cartpole.pth](saves/policy_cartpole.pth)
 - <b>Code:</b> [reinforce_cartpole.py](reinforce_cartpole.py)
 Below is the rewards accross 300 episodes : 
-![Rewards across episodes](saves/plot_rewards500.png)
-
+![Rewards across episodes](saves/plot_rewards.png)
+model : [policy_cartpole.pth](saves/policy_cartpole.pth)
 #### 1.2 Evaluation
 
 - <b>Code:</b> [evaluate_reinforce_cartpole.py](evaluate_reinforce_cartpole.py)
-The evaluation has been done one 100 episodes and the sucess threshold is set at a score of 400.
+The evaluation has been done one 100 episodes and the sucess threshold is set as 400.
 
 We finally have an evaluation with 100% of sucess:
 
@@ -26,5 +26,5 @@ We finally have an evaluation with 100% of sucess:
 Here we set up a complete pipeline to solve Cartpole environment with A2C algorithm.
 
 Wandb has been set up to follow the learning phase. 
-
+https://wandb.ai/maximecerise-ecl/cartpole-a2c
 ![alt text](saves/rollout.png)
diff --git a/a2c_sb3_cartpole.py b/a2c_sb3_cartpole.py
index 50f784709d629804612a5849af456d3b1e5d60f3..3cce8ed108e21d7267db74b3f5fa82ca08b2234f 100644
--- a/a2c_sb3_cartpole.py
+++ b/a2c_sb3_cartpole.py
@@ -13,15 +13,15 @@ env = DummyVecEnv([lambda: env])
 
 wandb.init(
     entity="maximecerise-ecl",
-    project="cartpole-a2c_",
+    project="cartpole-a2c",
     sync_tensorboard=True,
     monitor_gym=True,
     save_code=True
     )
 
 
-model = A2C("MlpPolicy", env, verbose=1)
-model.learn(total_timesteps=5000)
+model = A2C("MlpPolicy", env, verbose=1, tensorboard_log="./a2c_tensorboard/")
+model.learn(total_timesteps=500000)
 
 
 model.save("a2c_cartpole")
diff --git a/evaluate_reinforce_cartpole.py b/evaluate_reinforce_cartpole.py
new file mode 100644
index 0000000000000000000000000000000000000000..42f48985e181b66a9c169bdba2d8819398e49eeb
--- /dev/null
+++ b/evaluate_reinforce_cartpole.py
@@ -0,0 +1,53 @@
+import gym
+import torch
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from reinforce_cartpole import PolicyNetwork
+
+def evaluate_reinforce_cpole():
+    env = gym.make("CartPole-v1", render_mode="human")
+    obs_dim = env.observation_space.shape[0]
+    action_dim = env.action_space.n
+
+    # Charger le modèle
+    policy = PolicyNetwork(obs_dim, action_dim)
+    policy.load_state_dict(torch.load("saves/policy_cartpole.pth"))
+    policy.eval()  # Mode évaluation
+
+    num_episodes = 100
+    success_threshold = 400  # Score moyen requis pour considérer que l'agent a appris
+    success_count = 0
+    scores = []
+
+    for episode in range(num_episodes):
+        state, _ = env.reset()
+        state = torch.tensor(state, dtype=torch.float32)
+        done = False
+        total_reward = 0
+        
+        while not done:
+            with torch.no_grad():
+                action_probs = policy(state)
+                action = torch.argmax(action_probs).item()  # Choisir l'action la plus probable
+            
+            next_state, reward, terminated, truncated, _ = env.step(action)
+            total_reward += reward
+            
+            state = torch.tensor(next_state, dtype=torch.float32)
+            done = terminated or truncated
+        
+        scores.append(total_reward)
+        if total_reward >= success_threshold:
+            success_count += 1
+        
+        print(f"Épisode {episode+1}: Score = {total_reward}")
+
+    success_rate = success_count / num_episodes * 100
+    print(f"\nSuccès: {success_count}/{num_episodes} ({success_rate:.2f}%)")
+
+    env.close()
+
+if __name__ == "__main__":
+    evaluate_reinforce_cpole()
diff --git a/videos/rl-video-step-0-to-step-1000.mp4 b/videos/rl-video-step-0-to-step-1000.mp4
index 93ec3d2d9597f8803bbcc193c167c8779a59dea8..98f6318f63cb99401362e83ad589912a7b61629f 100644
Binary files a/videos/rl-video-step-0-to-step-1000.mp4 and b/videos/rl-video-step-0-to-step-1000.mp4 differ