diff --git a/a2c_cartpole.zip b/a2c_cartpole.zip
new file mode 100644
index 0000000000000000000000000000000000000000..3639d680731143430104e9a3afe38eb30d2fb43d
Binary files /dev/null and b/a2c_cartpole.zip differ
diff --git a/a2c_sb3_cartpole.py b/a2c_sb3_cartpole.py
new file mode 100644
index 0000000000000000000000000000000000000000..209d42c65667d721298b94cd2f94167b4570d526
--- /dev/null
+++ b/a2c_sb3_cartpole.py
@@ -0,0 +1,23 @@
+import gymnasium as gym  # Use gymnasium instead of gym
+from stable_baselines3 import A2C
+from stable_baselines3.common.evaluation import evaluate_policy
+
+# Create the CartPole environment (Gymnasium syntax)
+env = gym.make("CartPole-v1")
+
+# Initialize the A2C model
+model = A2C("MlpPolicy", env, verbose=1)
+
+# Train the model
+model.learn(total_timesteps=100000)
+
+# Save the model
+model.save("a2c_cartpole")
+
+# Evaluate the trained model
+mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
+
+print(f"✅ Mean Reward: {mean_reward:.2f} ± {std_reward:.2f}")
+
+# Close the environment
+env.close()
diff --git a/evaluate_reinforce_cartpole.py b/evaluate_reinforce_cartpole.py
new file mode 100644
index 0000000000000000000000000000000000000000..831c97dabec0cd4565359281ed32c338d1c8c4ef
--- /dev/null
+++ b/evaluate_reinforce_cartpole.py
@@ -0,0 +1,68 @@
+import gym
+import torch
+import torch.nn as nn
+import numpy as np
+
+# Define the same Policy Network structure
+class PolicyNetwork(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super(PolicyNetwork, self).__init__()
+        self.fc1 = nn.Linear(input_dim, 128)
+        self.dropout = nn.Dropout(0.2)
+        self.fc2 = nn.Linear(128, output_dim)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x):
+        x = torch.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = self.softmax(self.fc2(x))
+        return x
+
+# Load the trained model
+def load_model(filepath, env):
+    model = PolicyNetwork(env.observation_space.shape[0], env.action_space.n)
+    model.load_state_dict(torch.load(filepath))
+    model.eval()
+    return model
+
+# Evaluate the model for 100 episodes
+def evaluate_model(env, model, num_episodes=100, success_threshold=195):
+    success_count = 0
+    rewards = []
+
+    for episode in range(num_episodes):
+        state = env.reset()[0]
+        total_reward = 0
+
+        while True:
+            state_tensor = torch.tensor(state, dtype=torch.float32)
+            action_probs = model(state_tensor)
+            action = torch.argmax(action_probs).item()
+
+            next_state, reward, terminated, truncated, _ = env.step(action)
+            total_reward += reward
+            state = next_state
+
+            if terminated or truncated:
+                break
+
+        rewards.append(total_reward)
+        if total_reward >= success_threshold:
+            success_count += 1
+
+        print(f"Episode {episode+1}: Total Reward = {total_reward}")
+
+    success_rate = success_count / num_episodes * 100
+    print(f"\n✅ Success Rate: {success_rate:.2f}% ({success_count}/{num_episodes} episodes)")
+
+    return success_rate, rewards
+
+if __name__ == "__main__":
+    env = gym.make("CartPole-v1", render_mode="human")  # Render for visualization
+    model = load_model("reinforce_cartpole.pth", env)
+    success_rate, _ = evaluate_model(env, model, num_episodes=100)
+    env.close()
+
+    # Save success rate for README update
+    with open("evaluation_result.txt", "w") as f:
+        f.write(f"Success Rate: {success_rate:.2f}%\n")
diff --git a/evaluation_result.txt b/evaluation_result.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0be35bff0c32753099d9d8ed6b09faf260992058
--- /dev/null
+++ b/evaluation_result.txt
@@ -0,0 +1 @@
+Success Rate: 100.00%
diff --git a/push_to_hub.py b/push_to_hub.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6c8a105b356bb0926a5ad027a97dcb42179c312
--- /dev/null
+++ b/push_to_hub.py
@@ -0,0 +1,23 @@
+import gym
+from stable_baselines3 import A2C
+from huggingface_sb3 import package_to_hub
+
+# Define your Hugging Face repository ID
+repo_id = "pinkiexi/a2c-cartpole-v1"  # Change this!
+
+# Create and initialize the environment
+env = gym.make("CartPole-v1")
+
+# Load the trained model
+model = A2C.load("a2c_cartpole")
+
+# Upload the model to Hugging Face Hub
+package_to_hub(
+    model=model,
+    model_name="A2C",
+    model_architecture="A2C",
+    env_id="CartPole-v1",
+    repo_id=repo_id,
+    commit_message="Upload trained A2C model for CartPole",
+    eval_env=env  # ✅ Now, env is defined
+)
diff --git a/reinforce_cartpole.pth b/reinforce_cartpole.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fef2108e9d370f6a85a40fccc5f5713e73df3497
Binary files /dev/null and b/reinforce_cartpole.pth differ
diff --git a/reinforce_cartpole.py b/reinforce_cartpole.py
new file mode 100644
index 0000000000000000000000000000000000000000..da7aa518bb88ecf190ab90c86f2f13cf5dca1802
--- /dev/null
+++ b/reinforce_cartpole.py
@@ -0,0 +1,93 @@
+import gym
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Define the Policy Network
+class PolicyNetwork(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super(PolicyNetwork, self).__init__()
+        self.fc1 = nn.Linear(input_dim, 128)
+        self.dropout = nn.Dropout(0.2)
+        self.fc2 = nn.Linear(128, output_dim)
+        self.softmax = nn.Softmax(dim=-1)
+
+    def forward(self, x):
+        x = torch.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = self.softmax(self.fc2(x))
+        return x
+
+# Define the REINFORCE Algorithm
+class REINFORCE:
+    def __init__(self, env, learning_rate=5e-3, gamma=0.99):
+        self.env = env
+        self.gamma = gamma
+        self.policy = PolicyNetwork(env.observation_space.shape[0], env.action_space.n)
+        self.optimizer = optim.Adam(self.policy.parameters(), lr=learning_rate)
+        self.episode_rewards = []
+
+    def select_action(self, state):
+        state = torch.tensor(state, dtype=torch.float32)
+        action_probs = self.policy(state)
+        action = torch.multinomial(action_probs, 1).item()
+        return action, action_probs[action]
+
+    def train(self, num_episodes=500):
+        all_rewards = []  # Store rewards for plotting
+
+        for episode in range(num_episodes):
+            state = self.env.reset()[0]
+            episode_memory = []
+            episode_reward = 0
+
+            while True:
+                action, action_prob = self.select_action(state)
+                next_state, reward, terminated, truncated, _ = self.env.step(action)
+                episode_memory.append((action_prob, reward))
+                episode_reward += reward
+                state = next_state
+                if terminated or truncated:
+                    break
+
+            # Compute returns
+            returns = []
+            discounted_sum = 0
+            for _, reward in reversed(episode_memory):
+                discounted_sum = reward + self.gamma * discounted_sum
+                returns.insert(0, discounted_sum)
+
+            returns = torch.tensor(returns)
+            returns = (returns - returns.mean()) / (returns.std() + 1e-8)
+
+            # Compute policy loss
+            loss = 0
+            for (action_prob, _), G in zip(episode_memory, returns):
+                loss -= torch.log(action_prob) * G
+
+            self.optimizer.zero_grad()
+            loss.backward()
+            self.optimizer.step()
+
+            all_rewards.append(episode_reward)
+            print(f"Episode {episode + 1}: Total Reward = {episode_reward}")
+
+        # Save the model
+        torch.save(self.policy.state_dict(), "reinforce_cartpole.pth")
+        print("Training complete. Model saved!")
+
+        # Plot rewards
+        plt.plot(all_rewards)
+        plt.xlabel("Episodes")
+        plt.ylabel("Total Reward")
+        plt.title("Training Progress")
+        plt.savefig("reward_plot.png")  # Save the plot
+        print("Reward plot saved as reward_plot.png.")
+
+if __name__ == "__main__":
+    env = gym.make("CartPole-v1", render_mode=None)  # Change render_mode to "human" for visualization
+    agent = REINFORCE(env)
+    agent.train(num_episodes=500)
+    env.close()
diff --git a/reward_plot.png b/reward_plot.png
new file mode 100644
index 0000000000000000000000000000000000000000..7ccb35b44f14dc330883c4062492af46fd74f947
Binary files /dev/null and b/reward_plot.png differ