import gymnasium as gym from stable_baselines3 import A2C from stable_baselines3.common.env_util import make_vec_env # Create and wrap the environment env_id = "CartPole-v1" env = make_vec_env(env_id, n_envs=1) # Initialize the A2C agent model = A2C('MlpPolicy', env, verbose=1) # Train the agent model.learn(total_timesteps=10000) # Save the trained model model.save("a2c_sb3_cartpole") # Evaluate the trained agent obs = env.reset() for _ in range(1000): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render() # Close the environment env.close()