Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
hands-on-rl
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package registry
Model registry
Operate
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Brussart Paul-emile
hands-on-rl
Commits
bb31e1b0
Commit
bb31e1b0
authored
2 years ago
by
Brussart Paul-emile
Browse files
Options
Downloads
Patches
Plain Diff
Adding a2c_sb3_panda_reach.py
parent
25c3cb07
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
a2c_sb3_panda_reach.py
+50
-0
50 additions, 0 deletions
a2c_sb3_panda_reach.py
with
50 additions
and
0 deletions
a2c_sb3_panda_reach.py
0 → 100644
+
50
−
0
View file @
bb31e1b0
import
gym
import
panda_gym
from
stable_baselines3
import
A2C
from
stable_baselines3.common.monitor
import
Monitor
from
stable_baselines3.common.vec_env
import
DummyVecEnv
import
wandb
from
wandb.integration.sb3
import
WandbCallback
# Define a dictionary to store the configuration for the experiment
config
=
{
"
policy_type
"
:
"
MultiInputPolicy
"
,
# Specify the type of policy to be used
"
total_timesteps
"
:
500000
,
# Total number of timesteps for training
"
env_name
"
:
"
PandaReachJointsDense-v2
"
,
# Name of the environment to be used
}
# Initialize the W&B run with the specified project and configuration
run
=
wandb
.
init
(
project
=
"
pandareach
"
,
config
=
config
,
sync_tensorboard
=
True
,
monitor_gym
=
True
,
save_code
=
True
,
)
# Define a function to create the environment
def
make_env
():
env
=
gym
.
make
(
config
[
"
env_name
"
])
# Create the environment using the specified name
env
=
Monitor
(
env
)
# Wrap the environment in a Monitor to record various metrics
return
env
# Create a vectorized environment using the make_env function
env
=
DummyVecEnv
([
make_env
])
# Initialize the A2C model with the specified policy type and environment
model
=
A2C
(
config
[
"
policy_type
"
],
env
,
verbose
=
1
,
tensorboard_log
=
f
"
runs/
{
run
.
id
}
"
)
# Train the model for the specified number of timesteps
model
.
learn
(
total_timesteps
=
config
[
"
total_timesteps
"
],
callback
=
WandbCallback
(
gradient_save_freq
=
10000
,
# Save the gradients every 10000 timesteps
model_save_path
=
f
"
models/
{
run
.
id
}
"
,
# Save the model with the specified file path
verbose
=
2
,
# Specify the verbosity level for the WandbCallback
)
)
model
.
save
(
"
a2c_sb3_panda_reach
"
)
# Finish the W&B run
run
.
finish
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment