Skip to content
Snippets Groups Projects
Commit bb0e23c5 authored by Chahri Hanane's avatar Chahri Hanane
Browse files

Your commit message

parent 5fcf0aca
Branches
No related tags found
No related merge requests found
File added
import gymnasium as gym # Use gymnasium instead of gym
from stable_baselines3 import A2C
from stable_baselines3.common.evaluation import evaluate_policy
# Create the CartPole environment (Gymnasium syntax)
env = gym.make("CartPole-v1")
# Initialize the A2C model
model = A2C("MlpPolicy", env, verbose=1)
# Train the model
model.learn(total_timesteps=100000)
# Save the model
model.save("a2c_cartpole")
# Evaluate the trained model
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"✅ Mean Reward: {mean_reward:.2f} ± {std_reward:.2f}")
# Close the environment
env.close()
import gym
import torch
import torch.nn as nn
import numpy as np
# Define the same Policy Network structure
class PolicyNetwork(nn.Module):
def __init__(self, input_dim, output_dim):
super(PolicyNetwork, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.dropout = nn.Dropout(0.2)
self.fc2 = nn.Linear(128, output_dim)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.dropout(x)
x = self.softmax(self.fc2(x))
return x
# Load the trained model
def load_model(filepath, env):
model = PolicyNetwork(env.observation_space.shape[0], env.action_space.n)
model.load_state_dict(torch.load(filepath))
model.eval()
return model
# Evaluate the model for 100 episodes
def evaluate_model(env, model, num_episodes=100, success_threshold=195):
success_count = 0
rewards = []
for episode in range(num_episodes):
state = env.reset()[0]
total_reward = 0
while True:
state_tensor = torch.tensor(state, dtype=torch.float32)
action_probs = model(state_tensor)
action = torch.argmax(action_probs).item()
next_state, reward, terminated, truncated, _ = env.step(action)
total_reward += reward
state = next_state
if terminated or truncated:
break
rewards.append(total_reward)
if total_reward >= success_threshold:
success_count += 1
print(f"Episode {episode+1}: Total Reward = {total_reward}")
success_rate = success_count / num_episodes * 100
print(f"\n✅ Success Rate: {success_rate:.2f}% ({success_count}/{num_episodes} episodes)")
return success_rate, rewards
if __name__ == "__main__":
env = gym.make("CartPole-v1", render_mode="human") # Render for visualization
model = load_model("reinforce_cartpole.pth", env)
success_rate, _ = evaluate_model(env, model, num_episodes=100)
env.close()
# Save success rate for README update
with open("evaluation_result.txt", "w") as f:
f.write(f"Success Rate: {success_rate:.2f}%\n")
Success Rate: 100.00%
import gym
from stable_baselines3 import A2C
from huggingface_sb3 import package_to_hub
# Define your Hugging Face repository ID
repo_id = "pinkiexi/a2c-cartpole-v1" # Change this!
# Create and initialize the environment
env = gym.make("CartPole-v1")
# Load the trained model
model = A2C.load("a2c_cartpole")
# Upload the model to Hugging Face Hub
package_to_hub(
model=model,
model_name="A2C",
model_architecture="A2C",
env_id="CartPole-v1",
repo_id=repo_id,
commit_message="Upload trained A2C model for CartPole",
eval_env=env # ✅ Now, env is defined
)
File added
import gym
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
# Define the Policy Network
class PolicyNetwork(nn.Module):
def __init__(self, input_dim, output_dim):
super(PolicyNetwork, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.dropout = nn.Dropout(0.2)
self.fc2 = nn.Linear(128, output_dim)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = self.dropout(x)
x = self.softmax(self.fc2(x))
return x
# Define the REINFORCE Algorithm
class REINFORCE:
def __init__(self, env, learning_rate=5e-3, gamma=0.99):
self.env = env
self.gamma = gamma
self.policy = PolicyNetwork(env.observation_space.shape[0], env.action_space.n)
self.optimizer = optim.Adam(self.policy.parameters(), lr=learning_rate)
self.episode_rewards = []
def select_action(self, state):
state = torch.tensor(state, dtype=torch.float32)
action_probs = self.policy(state)
action = torch.multinomial(action_probs, 1).item()
return action, action_probs[action]
def train(self, num_episodes=500):
all_rewards = [] # Store rewards for plotting
for episode in range(num_episodes):
state = self.env.reset()[0]
episode_memory = []
episode_reward = 0
while True:
action, action_prob = self.select_action(state)
next_state, reward, terminated, truncated, _ = self.env.step(action)
episode_memory.append((action_prob, reward))
episode_reward += reward
state = next_state
if terminated or truncated:
break
# Compute returns
returns = []
discounted_sum = 0
for _, reward in reversed(episode_memory):
discounted_sum = reward + self.gamma * discounted_sum
returns.insert(0, discounted_sum)
returns = torch.tensor(returns)
returns = (returns - returns.mean()) / (returns.std() + 1e-8)
# Compute policy loss
loss = 0
for (action_prob, _), G in zip(episode_memory, returns):
loss -= torch.log(action_prob) * G
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
all_rewards.append(episode_reward)
print(f"Episode {episode + 1}: Total Reward = {episode_reward}")
# Save the model
torch.save(self.policy.state_dict(), "reinforce_cartpole.pth")
print("Training complete. Model saved!")
# Plot rewards
plt.plot(all_rewards)
plt.xlabel("Episodes")
plt.ylabel("Total Reward")
plt.title("Training Progress")
plt.savefig("reward_plot.png") # Save the plot
print("Reward plot saved as reward_plot.png.")
if __name__ == "__main__":
env = gym.make("CartPole-v1", render_mode=None) # Change render_mode to "human" for visualization
agent = REINFORCE(env)
agent.train(num_episodes=500)
env.close()
reward_plot.png

48.5 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment