diff --git a/a2c_sb3_cartpole.py b/a2c_sb3_cartpole.py index 9a1a519360c47cdc8bc7d4591c6f38649cad2339..3e2ed053849f5c9dfe290076b05f8abfeed39335 100644 --- a/a2c_sb3_cartpole.py +++ b/a2c_sb3_cartpole.py @@ -1,69 +1,94 @@ import gym -import cv2 from stable_baselines3 import A2C -from huggingface_sb3 import package_to_hub, push_to_hub -from gym import envs from gymnasium.envs.registration import register from tqdm import tqdm -import matplotlib.pyplot as plt import wandb from wandb.integration.sb3 import WandbCallback from stable_baselines3.common.vec_env import VecVideoRecorder -import dill -import zipfile - -# Initialize Weights & Biases -total_timesteps = 10000 -config = { - "policy_type": "MlpPolicy", - "total_timesteps": total_timesteps, - "env_name": "CartPole-v1", -} -wandb.login() -run = wandb.init( - project="a2c-cartpole-v1", - config=config, - sync_tensorboard=True, # auto-upload sb3's tensorboard metrics - monitor_gym=True, # auto-upload the videos of agents playing the game - save_code=True, # optional -) -env_id = "CartPole-v1" - -# Register the environment -register(id=env_id, entry_point='gym.envs.classic_control:CartPoleEnv', max_episode_steps=500) - -env = gym.make(env_id) -# env = VecVideoRecorder( -# env, -# f"videos/{run.id}", -# record_video_trigger=lambda x: x % 2000 == 0, -# video_length=200, -# ) - -model = A2C("MlpPolicy", env, verbose=1, tensorboard_log=f"runs/{run.id}") -model.learn(total_timesteps=total_timesteps, callback=WandbCallback( - gradient_save_freq=100, - model_save_path=f"models/{run.id}")) - - -# Mark the run as public in W&B project settings -run.finish() - -vec_env = model.get_env() -obs = vec_env.reset() - -for i in tqdm(range(1000)): - action, _state = model.predict(obs, deterministic=True) - obs, reward, done, info = vec_env.step(action) - vec_env.render() - -def save_model(model, env_id): - # Step 1: Serialize the model - model_bytes = dill.dumps(model) - - # Step 2: Create a .zip file containing the serialized model - zip_filename = env_id + ".zip" - with zipfile.ZipFile(zip_filename, 'w') as zipf: - zipf.writestr("model.pkl", model_bytes) +from huggingface_sb3 import push_to_hub +def train_model(config, env_id, policy, project_name): + """ + Train a model using the A2C algorithm with Weights & Biases integration. + + Args: + config (dict): Configuration parameters for training. + env_id (str): Identifier of the Gym environment. + policy (str): Type of policy to use for the model. + project_name (str): Name of the project in Weights & Biases. + + Returns: + A2C: Trained A2C model. + """ + # Initialize Weights & Biases + wandb.login() + run = wandb.init( + project=project_name, + config=config, + sync_tensorboard=True, # auto-upload sb3's tensorboard metrics + monitor_gym=True, # auto-upload the videos of agents playing the game + save_code=True, # optional + ) + + # Register the environment + register(id=env_id, entry_point='gym.envs.classic_control:CartPoleEnv', max_episode_steps=500) + + env = gym.make(env_id, render_mode="rgb_array") + + model = A2C(policy, env, verbose=1, tensorboard_log=f"runs/{run.id}") + model.learn(total_timesteps=config["total_timesteps"]) + + + # Mark the run as public in W&B project settings + run.finish() + return model + +def test_model(model): + """ + Test a trained model by running it in the environment. + + Args: + model (A2C): Trained A2C model to be tested. + """ + vec_env = model.get_env() + obs = vec_env.reset() + + for _ in tqdm(range(1000)): + action, _state = model.predict(obs, deterministic=True) + obs, reward, done, info = vec_env.step(action) + vec_env.render("rgb_array") + +def save_push_model(model, project_name): + """ + Save the trained model and push it to the Hugging Face Model Hub. + + Args: + model (A2C): Trained A2C model. + project_name (str): Name of the project to save the model. + """ + model.save(project_name + ".zip") + + #HugingFace + push_to_hub( + repo_id="oscarchaufour/a2c-CartPole-v1", + filename=project_name + ".zip", + commit_message="Adding CartPole model trained with A2C on HuggingFace", + token="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" + ) + + +if __name__ == "__main__": + env_id = "CartPole-v1" + policy = "MlpPolicy" + + config = { + "policy_type": policy, + "total_timesteps": 10000, + "env_name": env_id, + } + project_name = "a2c-CartPole-v1" + + trained_model = train_model(config, env_id, policy, project_name) + test_model(trained_model) + save_push_model(trained_model, project_name) diff --git a/a2c_sb3_panda_reach.py b/a2c_sb3_panda_reach.py index f2ac80b8cea2dec346611b487b2c88afda6e99b1..134941a732e434e9f2f98440b47f67707a00e383 100644 --- a/a2c_sb3_panda_reach.py +++ b/a2c_sb3_panda_reach.py @@ -1,64 +1,94 @@ import gym -import panda_gym from stable_baselines3 import A2C -from huggingface_sb3 import package_to_hub, push_to_hub -from gym import envs from gymnasium.envs.registration import register from tqdm import tqdm -import matplotlib.pyplot as plt import wandb from wandb.integration.sb3 import WandbCallback from stable_baselines3.common.vec_env import VecVideoRecorder -import dill -import zipfile +from huggingface_sb3 import push_to_hub -# Initialize Weights & Biases -total_timesteps = 100000 -config = { - "policy_type": "MlpPolicy", - "total_timesteps": total_timesteps, - "env_name": "PandaReachJointsDense-v3", -} -wandb.login() -run = wandb.init( - project="a2c-PandaReachJointsDense-v3", - config=config, - sync_tensorboard=True, # auto-upload sb3's tensorboard metrics - monitor_gym=True, # auto-upload the videos of agents playing the game - save_code=True, # optional -) -env_id = "PandaReachJointsDense-v3" -# Register the environment -register(id=env_id, entry_point='gym.envs.classic_control:CartPoleEnv', max_episode_steps=500) +def train_model(config, env_id, policy, project_name): + """ + Train a model using the A2C algorithm with Weights & Biases integration. -env = gym.make(env_id) + Args: + config (dict): Configuration parameters for training. + env_id (str): Identifier of the Gym environment. + policy (str): Type of policy to use for the model. + project_name (str): Name of the project in Weights & Biases. -model = A2C("MlpPolicy", env, verbose=1, tensorboard_log=f"runs/{run.id}") -model.learn(total_timesteps=total_timesteps, callback=WandbCallback( - gradient_save_freq=100, - model_save_path=f"models/{run.id}")) + Returns: + A2C: Trained A2C model. + """ + # Initialize Weights & Biases + wandb.login() + run = wandb.init( + project=project_name, + config=config, + sync_tensorboard=True, # auto-upload sb3's tensorboard metrics + monitor_gym=True, # auto-upload the videos of agents playing the game + save_code=True, # optional + ) + # Register the environment + register(id=env_id, entry_point='gym.envs.robotics:PandaReachEnv', max_episode_steps=500) -# Mark the run as public in W&B project settings -run.finish() + env = gym.make(env_id, render_mode="rgb_array") -vec_env = model.get_env() -obs = vec_env.reset() + model = A2C(policy, env, verbose=1, tensorboard_log=f"runs/{run.id}") + model.learn(total_timesteps=config["total_timesteps"]) -for i in tqdm(range(1000)): - action, _state = model.predict(obs, deterministic=True) - obs, reward, done, info = vec_env.step(action) - vec_env.render() -def save_model(model, env_id): # use this function to save the model without wandb visualization - # Step 1: Serialize the model - model_bytes = dill.dumps(model) + # Mark the run as public in W&B project settings + run.finish() + return model - # Step 2: Create a .zip file containing the serialized model - zip_filename = env_id + ".zip" - with zipfile.ZipFile(zip_filename, 'w') as zipf: - zipf.writestr("model.pkl", model_bytes) +def test_model(model): + """ + Test a trained model by running it in the environment. + Args: + model (A2C): Trained A2C model to be tested. + """ + vec_env = model.get_env() + obs = vec_env.reset() + for _ in tqdm(range(1000)): + action, _state = model.predict(obs, deterministic=True) + obs, reward, done, info = vec_env.step(action) + vec_env.render("rgb_array") +def save_push_model(model, project_name): + """ + Save the trained model and push it to the Hugging Face Model Hub. + + Args: + model (A2C): Trained A2C model. + project_name (str): Name of the project to save the model. + """ + model.save(project_name + ".zip") + + #HugingFace + push_to_hub( + repo_id="oscarchaufour/a2c-PandaReachJointsDense-v2", + filename=project_name + ".zip", + commit_message="Adding PandaReachJointsDense model trained with A2C on HuggingFace", + token="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" + ) + + +if __name__ == "__main__": + env_id = "PandaReachJointsDense-v2" + policy = "MlpPolicy" + + config = { + "policy_type": policy, + "total_timesteps": 500000, + "env_name": env_id, + } + project_name = "a2c-PandaReachJointsDense-v2" + + trained_model = train_model(config, env_id, policy, project_name) + test_model(trained_model) + save_push_model(trained_model, project_name)