import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_vec_env

# Create and wrap the environment
env_id = "CartPole-v1"
env = make_vec_env(env_id, n_envs=1)

# Initialize the A2C agent
model = A2C('MlpPolicy', env, verbose=1)

# Train the agent
model.learn(total_timesteps=10000)

# Save the trained model
model.save("a2c_sb3_cartpole")

# Evaluate the trained agent
obs = env.reset()
for _ in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

# Close the environment
env.close()