diff --git a/README.md b/README.md
index 40ab7da556692a6b00a8dd31bf1ff62e391d1eba..4e23205ac37cc6f8dc8fc1c912b5571c70c9086d 100644
--- a/README.md
+++ b/README.md
@@ -25,9 +25,20 @@ see [a2c_sb3_cartpole.py](a2c_sb3_cartpole.py)
 
 ### Hugging Face Hub
 
-[Link to the trained model](https://huggingface.co/Thomstr/A2C_CartPole/tree/main)
+[Link to the trained model (cartpole)](https://huggingface.co/Thomstr/A2C_CartPole/tree/main)
 
 ### Weights & Biases
-[Link to the wandb run](https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/cartpole/runs/vh4anh20/workspace?nw=nwuserthomasdgr)
+[Link to the wandb run (cartpole)](https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/cartpole/runs/vh4anh20/workspace?nw=nwuserthomasdgr)
 
 ### Full workflow with panda-gym
+see [a2c_sb3_panda_reach.py](a2c_sb3_panda_reach.py)
+
+As I couldn't make it work on my PC (difficulties to install panda-gym), I've used Google Colab.
+
+see my notebook [here (online)](https://colab.research.google.com/drive/1l03F398QLHHVVqJ-GvRgxA4d-cCocF4K?usp=sharing) 
+or directly [a2c_sb3_panda_reach.ipynb](a2c_sb3_panda_reach.ipynb)
+
+
+[Link to the trained model (panda reach)](https://huggingface.co/Thomstr/A2C_PandaReach/tree/main)
+
+[Link to the wandb run (panda reach)](https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws?nw=nwuserthomasdgr)
diff --git a/a2c_sb3_cartpole.py b/a2c_sb3_cartpole.py
index 9a370e3a67325fd902898f59327aed6f8110c66e..46a0747be1cabeda0424c82a3cbef085f78bc55d 100644
--- a/a2c_sb3_cartpole.py
+++ b/a2c_sb3_cartpole.py
@@ -21,7 +21,7 @@ if __name__ == "__main__":
         "total_timesteps": 25000,
         "env_name": "CartPole-v1",
     }
-    wandb.login(key='4ac81e81b051a56ebfc528b579021cfc9ed1e5dc')
+    wandb.login(key='xxxxxxx')
     run = wandb.init(
         project="cartpole",
         config=config,
diff --git a/a2c_sb3_panda_reach.ipynb b/a2c_sb3_panda_reach.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..6df3a3ddb5b13969150d36e4d77fa4437d807d0c
--- /dev/null
+++ b/a2c_sb3_panda_reach.ipynb
@@ -0,0 +1,5885 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "4b24dddcdfc64f9d943f67e25675a8d1": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HBoxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_7f8f84aaf0f34b12950cc66d078b2812",
+       "IPY_MODEL_440f7f8102854ac4a38f5c7929446fe6",
+       "IPY_MODEL_f92b16113ecf4cbf999208426529ccad"
+      ],
+      "layout": "IPY_MODEL_04966e7c2fff44d08a5d5d2083c4c36f"
+     }
+    },
+    "7f8f84aaf0f34b12950cc66d078b2812": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_71a187c360684dfa8165f0da5a6bd84b",
+      "placeholder": "​",
+      "style": "IPY_MODEL_48aea33a04ec425291c36aba5afce22e",
+      "value": "a2c_pandareach.zip: 100%"
+     }
+    },
+    "440f7f8102854ac4a38f5c7929446fe6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "FloatProgressModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_33217ae5bf0a484fb3ae7bbf17cf0fbc",
+      "max": 113685,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_d69510a7c2894695a6ac9d8f03daf543",
+      "value": 113685
+     }
+    },
+    "f92b16113ecf4cbf999208426529ccad": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_6e384db209c243c88dbc91571a4418f7",
+      "placeholder": "​",
+      "style": "IPY_MODEL_e038b80524d4425a86421308d60c7445",
+      "value": " 114k/114k [00:00<00:00, 797kB/s]"
+     }
+    },
+    "04966e7c2fff44d08a5d5d2083c4c36f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "71a187c360684dfa8165f0da5a6bd84b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "48aea33a04ec425291c36aba5afce22e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "33217ae5bf0a484fb3ae7bbf17cf0fbc": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d69510a7c2894695a6ac9d8f03daf543": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ProgressStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "6e384db209c243c88dbc91571a4418f7": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "e038b80524d4425a86421308d60c7445": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    }
+   }
+  }
+ },
+ "cells": [
+  {
+   "cell_type": "code",
+   "source": [
+    "!pip install panda-gym"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "hvK-6XS59xLJ",
+    "outputId": "97a119da-523e-418a-ec0d-8d236d9d6a9e"
+   },
+   "execution_count": 2,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Collecting panda-gym\n",
+      "  Downloading panda_gym-3.0.7-py3-none-any.whl.metadata (4.3 kB)\n",
+      "Requirement already satisfied: gymnasium>=0.26 in /usr/local/lib/python3.11/dist-packages (from panda-gym) (1.1.1)\n",
+      "Collecting pybullet (from panda-gym)\n",
+      "  Downloading pybullet-3.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from panda-gym) (1.26.4)\n",
+      "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from panda-gym) (1.14.1)\n",
+      "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym) (3.1.1)\n",
+      "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym) (4.12.2)\n",
+      "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym) (0.0.4)\n",
+      "Downloading panda_gym-3.0.7-py3-none-any.whl (23 kB)\n",
+      "Downloading pybullet-3.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (103.2 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m103.2/103.2 MB\u001B[0m \u001B[31m7.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hInstalling collected packages: pybullet, panda-gym\n",
+      "Successfully installed panda-gym-3.0.7 pybullet-3.2.7\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "!pip install huggingface-sb3==2.3.1"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "T4NUJWkfBA4i",
+    "outputId": "27d51024-d6f1-4d33-958d-f56f67df44d3"
+   },
+   "execution_count": 4,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Collecting huggingface-sb3==2.3.1\n",
+      "  Downloading huggingface_sb3-2.3.1-py3-none-any.whl.metadata (6.2 kB)\n",
+      "Requirement already satisfied: huggingface-hub~=0.8 in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (0.28.1)\n",
+      "Requirement already satisfied: pyyaml~=6.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (6.0.2)\n",
+      "Requirement already satisfied: wasabi in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (1.1.3)\n",
+      "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (1.26.4)\n",
+      "Requirement already satisfied: cloudpickle>=1.6 in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (3.1.1)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.17.0)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2024.10.0)\n",
+      "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (24.2)\n",
+      "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2.32.3)\n",
+      "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (4.67.1)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (4.12.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.4.1)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.10)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2.3.0)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2025.1.31)\n",
+      "Downloading huggingface_sb3-2.3.1-py3-none-any.whl (9.5 kB)\n",
+      "Installing collected packages: huggingface-sb3\n",
+      "Successfully installed huggingface-sb3-2.3.1\n"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "!pip install stable-baselines3[extra]"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "id": "bp-mvGab-IUr",
+    "outputId": "b37be13e-46d5-466c-8b12-8a1dda33fb16"
+   },
+   "execution_count": 4,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Collecting stable-baselines3[extra]\n",
+      "  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)\n",
+      "Collecting gymnasium<1.1.0,>=0.29.1 (from stable-baselines3[extra])\n",
+      "  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)\n",
+      "Requirement already satisfied: numpy<3.0,>=1.20 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (1.26.4)\n",
+      "Requirement already satisfied: torch<3.0,>=2.3 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.5.1+cu124)\n",
+      "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (3.1.1)\n",
+      "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.2.2)\n",
+      "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (3.10.0)\n",
+      "Requirement already satisfied: opencv-python in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (4.11.0.86)\n",
+      "Requirement already satisfied: pygame in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.6.1)\n",
+      "Requirement already satisfied: tensorboard>=2.9.1 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.18.0)\n",
+      "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (5.9.5)\n",
+      "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (4.67.1)\n",
+      "Requirement already satisfied: rich in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (13.9.4)\n",
+      "Requirement already satisfied: ale-py>=0.9.0 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (0.10.2)\n",
+      "Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (11.1.0)\n",
+      "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium<1.1.0,>=0.29.1->stable-baselines3[extra]) (4.12.2)\n",
+      "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium<1.1.0,>=0.29.1->stable-baselines3[extra]) (0.0.4)\n",
+      "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.4.0)\n",
+      "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.70.0)\n",
+      "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.7)\n",
+      "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (24.2)\n",
+      "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (4.25.6)\n",
+      "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (75.1.0)\n",
+      "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.17.0)\n",
+      "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (0.7.2)\n",
+      "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.1.3)\n",
+      "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.17.0)\n",
+      "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.4.2)\n",
+      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.1.6)\n",
+      "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (2024.10.0)\n",
+      "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
+      "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
+      "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting nvidia-curand-cu12==10.3.5.147 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
+      "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
+      "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (2.21.5)\n",
+      "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (12.4.127)\n",
+      "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n",
+      "  Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
+      "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.1.0)\n",
+      "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (1.13.1)\n",
+      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3.0,>=2.3->stable-baselines3[extra]) (1.3.0)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (1.3.1)\n",
+      "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (0.12.1)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (4.56.0)\n",
+      "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (1.4.8)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (3.2.1)\n",
+      "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3[extra]) (2025.1)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3[extra]) (2025.1)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich->stable-baselines3[extra]) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich->stable-baselines3[extra]) (2.18.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich->stable-baselines3[extra]) (0.1.2)\n",
+      "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.11/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.9.1->stable-baselines3[extra]) (3.0.2)\n",
+      "Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m958.1/958.1 kB\u001B[0m \u001B[31m12.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m363.4/363.4 MB\u001B[0m \u001B[31m4.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m13.8/13.8 MB\u001B[0m \u001B[31m94.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m24.6/24.6 MB\u001B[0m \u001B[31m71.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m883.7/883.7 kB\u001B[0m \u001B[31m53.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m664.8/664.8 MB\u001B[0m \u001B[31m2.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m211.5/211.5 MB\u001B[0m \u001B[31m5.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m56.3/56.3 MB\u001B[0m \u001B[31m11.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m127.9/127.9 MB\u001B[0m \u001B[31m7.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m207.5/207.5 MB\u001B[0m \u001B[31m6.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m21.1/21.1 MB\u001B[0m \u001B[31m80.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hDownloading stable_baselines3-2.5.0-py3-none-any.whl (183 kB)\n",
+      "\u001B[2K   \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m183.9/183.9 kB\u001B[0m \u001B[31m18.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n",
+      "\u001B[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, gymnasium, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, stable-baselines3\n",
+      "  Attempting uninstall: nvidia-nvjitlink-cu12\n",
+      "    Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n",
+      "    Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n",
+      "      Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n",
+      "  Attempting uninstall: nvidia-curand-cu12\n",
+      "    Found existing installation: nvidia-curand-cu12 10.3.6.82\n",
+      "    Uninstalling nvidia-curand-cu12-10.3.6.82:\n",
+      "      Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n",
+      "  Attempting uninstall: nvidia-cufft-cu12\n",
+      "    Found existing installation: nvidia-cufft-cu12 11.2.3.61\n",
+      "    Uninstalling nvidia-cufft-cu12-11.2.3.61:\n",
+      "      Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n",
+      "  Attempting uninstall: nvidia-cuda-runtime-cu12\n",
+      "    Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n",
+      "    Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n",
+      "      Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n",
+      "  Attempting uninstall: nvidia-cuda-nvrtc-cu12\n",
+      "    Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n",
+      "    Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n",
+      "      Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n",
+      "  Attempting uninstall: nvidia-cuda-cupti-cu12\n",
+      "    Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n",
+      "    Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n",
+      "      Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n",
+      "  Attempting uninstall: nvidia-cublas-cu12\n",
+      "    Found existing installation: nvidia-cublas-cu12 12.5.3.2\n",
+      "    Uninstalling nvidia-cublas-cu12-12.5.3.2:\n",
+      "      Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n",
+      "  Attempting uninstall: gymnasium\n",
+      "    Found existing installation: gymnasium 1.1.1\n",
+      "    Uninstalling gymnasium-1.1.1:\n",
+      "      Successfully uninstalled gymnasium-1.1.1\n",
+      "  Attempting uninstall: nvidia-cusparse-cu12\n",
+      "    Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n",
+      "    Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n",
+      "      Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n",
+      "  Attempting uninstall: nvidia-cudnn-cu12\n",
+      "    Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n",
+      "    Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n",
+      "      Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n",
+      "  Attempting uninstall: nvidia-cusolver-cu12\n",
+      "    Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n",
+      "    Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n",
+      "      Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n",
+      "Successfully installed gymnasium-1.0.0 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 stable-baselines3-2.5.0\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "application/vnd.colab-display-data+json": {
+       "pip_warning": {
+        "packages": [
+         "gymnasium"
+        ]
+       },
+       "id": "83027100988940df948a6b693e6748d8"
+      }
+     },
+     "metadata": {}
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "v5mGp7Nj9UMT"
+   },
+   "outputs": [],
+   "source": [
+    "import gymnasium as gym\n",
+    "import panda_gym\n",
+    "from stable_baselines3 import A2C\n",
+    "from stable_baselines3.common.monitor import Monitor\n",
+    "from stable_baselines3.common.vec_env import DummyVecEnv\n",
+    "import wandb\n",
+    "from wandb.integration.sb3 import WandbCallback\n",
+    "from huggingface_hub import login\n",
+    "from huggingface_sb3 import push_to_hub\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "config = {\n",
+    "        \"policy_type\": \"MultiInputPolicy\",\n",
+    "        \"total_timesteps\": 500000,\n",
+    "        \"env_name\": \"PandaReachJointsDense-v3\",\n",
+    "    }\n",
+    "\n",
+    "wandb.login(key='xxx')\n",
+    "\n",
+    "run = wandb.init(\n",
+    "    project=\"pandareach\",\n",
+    "    config=config,\n",
+    "    sync_tensorboard=True,\n",
+    "    monitor_gym=True,\n",
+    "    save_code=True,\n",
+    ")\n",
+    "\n",
+    "env = gym.make(config[\"env_name\"])\n",
+    "model = A2C(config[\"policy_type\"], env, verbose=1, tensorboard_log=f\"runs/{run.id}\")\n",
+    "model.learn(\n",
+    "    total_timesteps=config[\"total_timesteps\"],\n",
+    "    callback=WandbCallback(\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "run.finish()"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "collapsed": true,
+    "id": "wrqry5Vq-1xn",
+    "outputId": "7f7a2ac5-9e33-484e-f2c3-af2275ec4df0"
+   },
+   "execution_count": 2,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stderr",
+     "text": [
+      "/usr/local/lib/python3.11/dist-packages/notebook/utils.py:280: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n",
+      "  return LooseVersion(v) >= LooseVersion(check)\n",
+      "\u001B[34m\u001B[1mwandb\u001B[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.\n",
+      "\u001B[34m\u001B[1mwandb\u001B[0m: \u001B[33mWARNING\u001B[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
+      "\u001B[34m\u001B[1mwandb\u001B[0m: \u001B[33mWARNING\u001B[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
+      "\u001B[34m\u001B[1mwandb\u001B[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n",
+      "\u001B[34m\u001B[1mwandb\u001B[0m: Currently logged in as: \u001B[33mthomasdgr\u001B[0m (\u001B[33mthomasdgr-ecole-centrale-de-lyon\u001B[0m) to \u001B[32mhttps://api.wandb.ai\u001B[0m. Use \u001B[1m`wandb login --relogin`\u001B[0m to force relogin\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       "Tracking run with wandb version 0.19.8"
+      ]
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       "Run data is saved locally in <code>/content/wandb/run-20250312_160957-y39cy9ws</code>"
+      ]
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws' target=\"_blank\">brisk-fog-2</a></strong> to <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
+      ]
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       " View project at <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach</a>"
+      ]
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       " View run at <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws</a>"
+      ]
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001B[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.\u001B[0m\n",
+      "|    std                | 0.626    |\n",
+      "|    value_loss         | 0.00806  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.76     |\n",
+      "|    ep_rew_mean        | -0.537   |\n",
+      "|    success_rate       | 0.96     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 73800    |\n",
+      "|    time_elapsed       | 1261     |\n",
+      "|    total_timesteps    | 369000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.22    |\n",
+      "|    explained_variance | 0.391    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 73799    |\n",
+      "|    policy_loss        | 0.919    |\n",
+      "|    std                | 0.626    |\n",
+      "|    value_loss         | 0.193    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 8.81     |\n",
+      "|    ep_rew_mean        | -0.756   |\n",
+      "|    success_rate       | 0.92     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 73900    |\n",
+      "|    time_elapsed       | 1263     |\n",
+      "|    total_timesteps    | 369500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.19    |\n",
+      "|    explained_variance | -0.708   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 73899    |\n",
+      "|    policy_loss        | 3.03     |\n",
+      "|    std                | 0.624    |\n",
+      "|    value_loss         | 2.22     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.79     |\n",
+      "|    ep_rew_mean        | -0.689   |\n",
+      "|    success_rate       | 0.98     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74000    |\n",
+      "|    time_elapsed       | 1264     |\n",
+      "|    total_timesteps    | 370000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.19    |\n",
+      "|    explained_variance | 0.464    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 73999    |\n",
+      "|    policy_loss        | -0.834   |\n",
+      "|    std                | 0.625    |\n",
+      "|    value_loss         | 0.0336   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.15     |\n",
+      "|    ep_rew_mean        | -0.809   |\n",
+      "|    success_rate       | 0.98     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74100    |\n",
+      "|    time_elapsed       | 1266     |\n",
+      "|    total_timesteps    | 370500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.21    |\n",
+      "|    explained_variance | 0.952    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74099    |\n",
+      "|    policy_loss        | -0.512   |\n",
+      "|    std                | 0.626    |\n",
+      "|    value_loss         | 0.0782   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.98     |\n",
+      "|    ep_rew_mean        | -0.937   |\n",
+      "|    success_rate       | 0.97     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74200    |\n",
+      "|    time_elapsed       | 1268     |\n",
+      "|    total_timesteps    | 371000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.19    |\n",
+      "|    explained_variance | 0.584    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74199    |\n",
+      "|    policy_loss        | -1.23    |\n",
+      "|    std                | 0.623    |\n",
+      "|    value_loss         | 0.036    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.93     |\n",
+      "|    ep_rew_mean        | -0.748   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74300    |\n",
+      "|    time_elapsed       | 1270     |\n",
+      "|    total_timesteps    | 371500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.2     |\n",
+      "|    explained_variance | -1.43    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74299    |\n",
+      "|    policy_loss        | 2.67     |\n",
+      "|    std                | 0.626    |\n",
+      "|    value_loss         | 0.671    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.59     |\n",
+      "|    ep_rew_mean        | -0.563   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74400    |\n",
+      "|    time_elapsed       | 1272     |\n",
+      "|    total_timesteps    | 372000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.19    |\n",
+      "|    explained_variance | -0.742   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74399    |\n",
+      "|    policy_loss        | 6.47     |\n",
+      "|    std                | 0.626    |\n",
+      "|    value_loss         | 1.18     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.7      |\n",
+      "|    ep_rew_mean        | -0.507   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74500    |\n",
+      "|    time_elapsed       | 1274     |\n",
+      "|    total_timesteps    | 372500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.18    |\n",
+      "|    explained_variance | 0.643    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74499    |\n",
+      "|    policy_loss        | 0.548    |\n",
+      "|    std                | 0.625    |\n",
+      "|    value_loss         | 0.0473   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.68     |\n",
+      "|    ep_rew_mean        | -0.537   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74600    |\n",
+      "|    time_elapsed       | 1275     |\n",
+      "|    total_timesteps    | 373000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.14    |\n",
+      "|    explained_variance | -3.11    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74599    |\n",
+      "|    policy_loss        | 1.45     |\n",
+      "|    std                | 0.622    |\n",
+      "|    value_loss         | 0.114    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.9      |\n",
+      "|    ep_rew_mean        | -0.558   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74700    |\n",
+      "|    time_elapsed       | 1277     |\n",
+      "|    total_timesteps    | 373500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.16    |\n",
+      "|    explained_variance | 0.473    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74699    |\n",
+      "|    policy_loss        | -1.49    |\n",
+      "|    std                | 0.623    |\n",
+      "|    value_loss         | 0.11     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.53     |\n",
+      "|    ep_rew_mean        | -0.505   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74800    |\n",
+      "|    time_elapsed       | 1279     |\n",
+      "|    total_timesteps    | 374000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.15    |\n",
+      "|    explained_variance | 0.998    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74799    |\n",
+      "|    policy_loss        | -0.158   |\n",
+      "|    std                | 0.623    |\n",
+      "|    value_loss         | 0.000636 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.64     |\n",
+      "|    ep_rew_mean        | -0.526   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 74900    |\n",
+      "|    time_elapsed       | 1281     |\n",
+      "|    total_timesteps    | 374500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.13    |\n",
+      "|    explained_variance | 0.663    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74899    |\n",
+      "|    policy_loss        | -0.532   |\n",
+      "|    std                | 0.623    |\n",
+      "|    value_loss         | 0.0172   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.93     |\n",
+      "|    ep_rew_mean        | -0.474   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75000    |\n",
+      "|    time_elapsed       | 1283     |\n",
+      "|    total_timesteps    | 375000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.11    |\n",
+      "|    explained_variance | 0.965    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 74999    |\n",
+      "|    policy_loss        | 0.147    |\n",
+      "|    std                | 0.621    |\n",
+      "|    value_loss         | 0.00133  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.65     |\n",
+      "|    ep_rew_mean        | -0.419   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75100    |\n",
+      "|    time_elapsed       | 1285     |\n",
+      "|    total_timesteps    | 375500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.11    |\n",
+      "|    explained_variance | 0.884    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75099    |\n",
+      "|    policy_loss        | -0.948   |\n",
+      "|    std                | 0.62     |\n",
+      "|    value_loss         | 0.0348   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.54     |\n",
+      "|    ep_rew_mean        | -0.379   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75200    |\n",
+      "|    time_elapsed       | 1286     |\n",
+      "|    total_timesteps    | 376000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.1     |\n",
+      "|    explained_variance | 0.903    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75199    |\n",
+      "|    policy_loss        | 0.287    |\n",
+      "|    std                | 0.619    |\n",
+      "|    value_loss         | 0.00231  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.89     |\n",
+      "|    ep_rew_mean        | -0.432   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75300    |\n",
+      "|    time_elapsed       | 1288     |\n",
+      "|    total_timesteps    | 376500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.13    |\n",
+      "|    explained_variance | 0.876    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75299    |\n",
+      "|    policy_loss        | 0.399    |\n",
+      "|    std                | 0.622    |\n",
+      "|    value_loss         | 0.00878  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.21     |\n",
+      "|    ep_rew_mean        | -0.472   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75400    |\n",
+      "|    time_elapsed       | 1290     |\n",
+      "|    total_timesteps    | 377000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.1     |\n",
+      "|    explained_variance | 0.782    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75399    |\n",
+      "|    policy_loss        | -0.228   |\n",
+      "|    std                | 0.62     |\n",
+      "|    value_loss         | 0.00453  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.31     |\n",
+      "|    ep_rew_mean        | -0.469   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75500    |\n",
+      "|    time_elapsed       | 1291     |\n",
+      "|    total_timesteps    | 377500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.05    |\n",
+      "|    explained_variance | 0.252    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75499    |\n",
+      "|    policy_loss        | 0.32     |\n",
+      "|    std                | 0.616    |\n",
+      "|    value_loss         | 0.00817  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.77     |\n",
+      "|    ep_rew_mean        | -0.415   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75600    |\n",
+      "|    time_elapsed       | 1293     |\n",
+      "|    total_timesteps    | 378000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.01    |\n",
+      "|    explained_variance | 0.869    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75599    |\n",
+      "|    policy_loss        | -0.165   |\n",
+      "|    std                | 0.612    |\n",
+      "|    value_loss         | 0.00607  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.6      |\n",
+      "|    ep_rew_mean        | -0.38    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75700    |\n",
+      "|    time_elapsed       | 1295     |\n",
+      "|    total_timesteps    | 378500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.02    |\n",
+      "|    explained_variance | 0.934    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75699    |\n",
+      "|    policy_loss        | 0.284    |\n",
+      "|    std                | 0.613    |\n",
+      "|    value_loss         | 0.00298  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.71     |\n",
+      "|    ep_rew_mean        | -0.414   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75800    |\n",
+      "|    time_elapsed       | 1297     |\n",
+      "|    total_timesteps    | 379000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -6.01    |\n",
+      "|    explained_variance | -0.123   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75799    |\n",
+      "|    policy_loss        | -1.11    |\n",
+      "|    std                | 0.613    |\n",
+      "|    value_loss         | 0.0601   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.46     |\n",
+      "|    ep_rew_mean        | -0.384   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 75900    |\n",
+      "|    time_elapsed       | 1299     |\n",
+      "|    total_timesteps    | 379500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.98    |\n",
+      "|    explained_variance | 0.948    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75899    |\n",
+      "|    policy_loss        | -0.0621  |\n",
+      "|    std                | 0.609    |\n",
+      "|    value_loss         | 0.00387  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.48     |\n",
+      "|    ep_rew_mean        | -0.371   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76000    |\n",
+      "|    time_elapsed       | 1300     |\n",
+      "|    total_timesteps    | 380000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.97    |\n",
+      "|    explained_variance | -0.992   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 75999    |\n",
+      "|    policy_loss        | -0.626   |\n",
+      "|    std                | 0.608    |\n",
+      "|    value_loss         | 0.0292   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.84     |\n",
+      "|    ep_rew_mean        | -0.311   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76100    |\n",
+      "|    time_elapsed       | 1302     |\n",
+      "|    total_timesteps    | 380500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.91    |\n",
+      "|    explained_variance | 0.688    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76099    |\n",
+      "|    policy_loss        | -0.221   |\n",
+      "|    std                | 0.602    |\n",
+      "|    value_loss         | 0.00369  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.52     |\n",
+      "|    ep_rew_mean        | -0.375   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76200    |\n",
+      "|    time_elapsed       | 1304     |\n",
+      "|    total_timesteps    | 381000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.91    |\n",
+      "|    explained_variance | 0.31     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76199    |\n",
+      "|    policy_loss        | 0.367    |\n",
+      "|    std                | 0.602    |\n",
+      "|    value_loss         | 0.0075   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.74     |\n",
+      "|    ep_rew_mean        | -0.407   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76300    |\n",
+      "|    time_elapsed       | 1305     |\n",
+      "|    total_timesteps    | 381500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.89    |\n",
+      "|    explained_variance | 0.957    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76299    |\n",
+      "|    policy_loss        | -0.287   |\n",
+      "|    std                | 0.6      |\n",
+      "|    value_loss         | 0.00325  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.7      |\n",
+      "|    ep_rew_mean        | -0.394   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76400    |\n",
+      "|    time_elapsed       | 1307     |\n",
+      "|    total_timesteps    | 382000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.87    |\n",
+      "|    explained_variance | 0.847    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76399    |\n",
+      "|    policy_loss        | 0.207    |\n",
+      "|    std                | 0.598    |\n",
+      "|    value_loss         | 0.00257  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.32     |\n",
+      "|    ep_rew_mean        | -0.366   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76500    |\n",
+      "|    time_elapsed       | 1309     |\n",
+      "|    total_timesteps    | 382500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.88    |\n",
+      "|    explained_variance | 0.883    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76499    |\n",
+      "|    policy_loss        | 0.161    |\n",
+      "|    std                | 0.6      |\n",
+      "|    value_loss         | 0.00138  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.97     |\n",
+      "|    ep_rew_mean        | -0.312   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76600    |\n",
+      "|    time_elapsed       | 1311     |\n",
+      "|    total_timesteps    | 383000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.86    |\n",
+      "|    explained_variance | 0.96     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76599    |\n",
+      "|    policy_loss        | 0.289    |\n",
+      "|    std                | 0.598    |\n",
+      "|    value_loss         | 0.00437  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.6      |\n",
+      "|    ep_rew_mean        | -0.399   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76700    |\n",
+      "|    time_elapsed       | 1313     |\n",
+      "|    total_timesteps    | 383500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.84    |\n",
+      "|    explained_variance | 0.995    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76699    |\n",
+      "|    policy_loss        | 0.187    |\n",
+      "|    std                | 0.597    |\n",
+      "|    value_loss         | 0.00169  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.23     |\n",
+      "|    ep_rew_mean        | -0.363   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76800    |\n",
+      "|    time_elapsed       | 1314     |\n",
+      "|    total_timesteps    | 384000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.83    |\n",
+      "|    explained_variance | 0.858    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76799    |\n",
+      "|    policy_loss        | 0.153    |\n",
+      "|    std                | 0.596    |\n",
+      "|    value_loss         | 0.00414  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.5      |\n",
+      "|    ep_rew_mean        | -0.379   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 76900    |\n",
+      "|    time_elapsed       | 1316     |\n",
+      "|    total_timesteps    | 384500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.83    |\n",
+      "|    explained_variance | 0.979    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76899    |\n",
+      "|    policy_loss        | 0.231    |\n",
+      "|    std                | 0.595    |\n",
+      "|    value_loss         | 0.00242  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.01     |\n",
+      "|    ep_rew_mean        | -0.326   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77000    |\n",
+      "|    time_elapsed       | 1317     |\n",
+      "|    total_timesteps    | 385000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.82    |\n",
+      "|    explained_variance | 0.421    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 76999    |\n",
+      "|    policy_loss        | -0.388   |\n",
+      "|    std                | 0.595    |\n",
+      "|    value_loss         | 0.0049   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.41     |\n",
+      "|    ep_rew_mean        | -0.355   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77100    |\n",
+      "|    time_elapsed       | 1319     |\n",
+      "|    total_timesteps    | 385500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.79    |\n",
+      "|    explained_variance | 0.863    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77099    |\n",
+      "|    policy_loss        | 0.458    |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.0127   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.5      |\n",
+      "|    ep_rew_mean        | -0.365   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77200    |\n",
+      "|    time_elapsed       | 1321     |\n",
+      "|    total_timesteps    | 386000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.79    |\n",
+      "|    explained_variance | 0.89     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77199    |\n",
+      "|    policy_loss        | 0.224    |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00354  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.68     |\n",
+      "|    ep_rew_mean        | -0.396   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 77300    |\n",
+      "|    time_elapsed       | 1323     |\n",
+      "|    total_timesteps    | 386500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.83    |\n",
+      "|    explained_variance | 0.78     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77299    |\n",
+      "|    policy_loss        | -0.63    |\n",
+      "|    std                | 0.597    |\n",
+      "|    value_loss         | 0.0126   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.64     |\n",
+      "|    ep_rew_mean        | -0.378   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77400    |\n",
+      "|    time_elapsed       | 1325     |\n",
+      "|    total_timesteps    | 387000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.88    |\n",
+      "|    explained_variance | 0.348    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77399    |\n",
+      "|    policy_loss        | 0.876    |\n",
+      "|    std                | 0.6      |\n",
+      "|    value_loss         | 0.0334   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4        |\n",
+      "|    ep_rew_mean        | -0.334   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77500    |\n",
+      "|    time_elapsed       | 1326     |\n",
+      "|    total_timesteps    | 387500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.89    |\n",
+      "|    explained_variance | 0.989    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77499    |\n",
+      "|    policy_loss        | 0.306    |\n",
+      "|    std                | 0.6      |\n",
+      "|    value_loss         | 0.00283  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.33     |\n",
+      "|    ep_rew_mean        | -0.353   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77600    |\n",
+      "|    time_elapsed       | 1328     |\n",
+      "|    total_timesteps    | 388000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.88    |\n",
+      "|    explained_variance | 0.48     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77599    |\n",
+      "|    policy_loss        | -0.698   |\n",
+      "|    std                | 0.6      |\n",
+      "|    value_loss         | 0.0283   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.27     |\n",
+      "|    ep_rew_mean        | -0.351   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77700    |\n",
+      "|    time_elapsed       | 1329     |\n",
+      "|    total_timesteps    | 388500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.88    |\n",
+      "|    explained_variance | 0.799    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77699    |\n",
+      "|    policy_loss        | 0.242    |\n",
+      "|    std                | 0.6      |\n",
+      "|    value_loss         | 0.00354  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.05     |\n",
+      "|    ep_rew_mean        | -0.33    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77800    |\n",
+      "|    time_elapsed       | 1331     |\n",
+      "|    total_timesteps    | 389000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.85    |\n",
+      "|    explained_variance | 0.834    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77799    |\n",
+      "|    policy_loss        | 0.241    |\n",
+      "|    std                | 0.597    |\n",
+      "|    value_loss         | 0.00375  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.31     |\n",
+      "|    ep_rew_mean        | -0.361   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 77900    |\n",
+      "|    time_elapsed       | 1333     |\n",
+      "|    total_timesteps    | 389500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.83    |\n",
+      "|    explained_variance | 0.917    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77899    |\n",
+      "|    policy_loss        | -0.0251  |\n",
+      "|    std                | 0.596    |\n",
+      "|    value_loss         | 0.00161  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.13     |\n",
+      "|    ep_rew_mean        | -0.336   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78000    |\n",
+      "|    time_elapsed       | 1335     |\n",
+      "|    total_timesteps    | 390000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.79    |\n",
+      "|    explained_variance | 0.82     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 77999    |\n",
+      "|    policy_loss        | 0.254    |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00322  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.64     |\n",
+      "|    ep_rew_mean        | -0.292   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78100    |\n",
+      "|    time_elapsed       | 1337     |\n",
+      "|    total_timesteps    | 390500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.79    |\n",
+      "|    explained_variance | 0.993    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78099    |\n",
+      "|    policy_loss        | -0.183   |\n",
+      "|    std                | 0.595    |\n",
+      "|    value_loss         | 0.00204  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.4      |\n",
+      "|    ep_rew_mean        | -0.351   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78200    |\n",
+      "|    time_elapsed       | 1338     |\n",
+      "|    total_timesteps    | 391000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.8     |\n",
+      "|    explained_variance | 0.928    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78199    |\n",
+      "|    policy_loss        | -0.351   |\n",
+      "|    std                | 0.595    |\n",
+      "|    value_loss         | 0.00284  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.93     |\n",
+      "|    ep_rew_mean        | -0.322   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78300    |\n",
+      "|    time_elapsed       | 1340     |\n",
+      "|    total_timesteps    | 391500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.77    |\n",
+      "|    explained_variance | 0.174    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78299    |\n",
+      "|    policy_loss        | 0.151    |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00289  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.7      |\n",
+      "|    ep_rew_mean        | -0.295   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78400    |\n",
+      "|    time_elapsed       | 1341     |\n",
+      "|    total_timesteps    | 392000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.75    |\n",
+      "|    explained_variance | 0.938    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78399    |\n",
+      "|    policy_loss        | -0.0736  |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.000859 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.84     |\n",
+      "|    ep_rew_mean        | -0.296   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78500    |\n",
+      "|    time_elapsed       | 1343     |\n",
+      "|    total_timesteps    | 392500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.77    |\n",
+      "|    explained_variance | 0.963    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78499    |\n",
+      "|    policy_loss        | -0.00544 |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.000619 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.04     |\n",
+      "|    ep_rew_mean        | -0.325   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78600    |\n",
+      "|    time_elapsed       | 1345     |\n",
+      "|    total_timesteps    | 393000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.75    |\n",
+      "|    explained_variance | 0.95     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78599    |\n",
+      "|    policy_loss        | -0.152   |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00185  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.16     |\n",
+      "|    ep_rew_mean        | -0.342   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78700    |\n",
+      "|    time_elapsed       | 1346     |\n",
+      "|    total_timesteps    | 393500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.75    |\n",
+      "|    explained_variance | 0.971    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78699    |\n",
+      "|    policy_loss        | 0.0537   |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.000228 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.66     |\n",
+      "|    ep_rew_mean        | -0.299   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78800    |\n",
+      "|    time_elapsed       | 1348     |\n",
+      "|    total_timesteps    | 394000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.77    |\n",
+      "|    explained_variance | 0.986    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78799    |\n",
+      "|    policy_loss        | 0.112    |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.000503 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.54     |\n",
+      "|    ep_rew_mean        | -0.293   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 78900    |\n",
+      "|    time_elapsed       | 1350     |\n",
+      "|    total_timesteps    | 394500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.75    |\n",
+      "|    explained_variance | 0.941    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78899    |\n",
+      "|    policy_loss        | 0.148    |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00109  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.62     |\n",
+      "|    ep_rew_mean        | -0.29    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79000    |\n",
+      "|    time_elapsed       | 1352     |\n",
+      "|    total_timesteps    | 395000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.847    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 78999    |\n",
+      "|    policy_loss        | -0.0882  |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.000801 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.68     |\n",
+      "|    ep_rew_mean        | -0.29    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79100    |\n",
+      "|    time_elapsed       | 1353     |\n",
+      "|    total_timesteps    | 395500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.74    |\n",
+      "|    explained_variance | 0.88     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79099    |\n",
+      "|    policy_loss        | 0.107    |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00122  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.32     |\n",
+      "|    ep_rew_mean        | -0.358   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79200    |\n",
+      "|    time_elapsed       | 1355     |\n",
+      "|    total_timesteps    | 396000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.76    |\n",
+      "|    explained_variance | 0.983    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79199    |\n",
+      "|    policy_loss        | -0.0174  |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.000369 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.72     |\n",
+      "|    ep_rew_mean        | -0.305   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79300    |\n",
+      "|    time_elapsed       | 1356     |\n",
+      "|    total_timesteps    | 396500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.76    |\n",
+      "|    explained_variance | 0.865    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79299    |\n",
+      "|    policy_loss        | -0.246   |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00269  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.23     |\n",
+      "|    ep_rew_mean        | -0.359   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79400    |\n",
+      "|    time_elapsed       | 1358     |\n",
+      "|    total_timesteps    | 397000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.77    |\n",
+      "|    explained_variance | 0.99     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79399    |\n",
+      "|    policy_loss        | 0.00824  |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.000109 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.83     |\n",
+      "|    ep_rew_mean        | -0.311   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79500    |\n",
+      "|    time_elapsed       | 1360     |\n",
+      "|    total_timesteps    | 397500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.74    |\n",
+      "|    explained_variance | 0.998    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79499    |\n",
+      "|    policy_loss        | -0.00668 |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 6.53e-05 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.93     |\n",
+      "|    ep_rew_mean        | -0.322   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79600    |\n",
+      "|    time_elapsed       | 1362     |\n",
+      "|    total_timesteps    | 398000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.846    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79599    |\n",
+      "|    policy_loss        | -0.339   |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.00599  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.95     |\n",
+      "|    ep_rew_mean        | -0.326   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79700    |\n",
+      "|    time_elapsed       | 1364     |\n",
+      "|    total_timesteps    | 398500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.74    |\n",
+      "|    explained_variance | 0.848    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79699    |\n",
+      "|    policy_loss        | 0.321    |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00438  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.27     |\n",
+      "|    ep_rew_mean        | -0.354   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79800    |\n",
+      "|    time_elapsed       | 1365     |\n",
+      "|    total_timesteps    | 399000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.72    |\n",
+      "|    explained_variance | 0.865    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79799    |\n",
+      "|    policy_loss        | 0.114    |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.000994 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.64     |\n",
+      "|    ep_rew_mean        | -0.288   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 79900    |\n",
+      "|    time_elapsed       | 1367     |\n",
+      "|    total_timesteps    | 399500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.72    |\n",
+      "|    explained_variance | 0.997    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79899    |\n",
+      "|    policy_loss        | -0.0071  |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00012  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.82     |\n",
+      "|    ep_rew_mean        | -0.303   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80000    |\n",
+      "|    time_elapsed       | 1369     |\n",
+      "|    total_timesteps    | 400000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.72    |\n",
+      "|    explained_variance | 0.974    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 79999    |\n",
+      "|    policy_loss        | -0.4     |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00803  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.5      |\n",
+      "|    ep_rew_mean        | -0.357   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80100    |\n",
+      "|    time_elapsed       | 1370     |\n",
+      "|    total_timesteps    | 400500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.995    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80099    |\n",
+      "|    policy_loss        | -0.175   |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00169  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.17     |\n",
+      "|    ep_rew_mean        | -0.342   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80200    |\n",
+      "|    time_elapsed       | 1372     |\n",
+      "|    total_timesteps    | 401000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.74    |\n",
+      "|    explained_variance | 0.98     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80199    |\n",
+      "|    policy_loss        | 0.0791   |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00129  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.21     |\n",
+      "|    ep_rew_mean        | -0.437   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80300    |\n",
+      "|    time_elapsed       | 1374     |\n",
+      "|    total_timesteps    | 401500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.98     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80299    |\n",
+      "|    policy_loss        | -0.611   |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.0116   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.11     |\n",
+      "|    ep_rew_mean        | -0.335   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80400    |\n",
+      "|    time_elapsed       | 1376     |\n",
+      "|    total_timesteps    | 402000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.972    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80399    |\n",
+      "|    policy_loss        | 0.102    |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.000776 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.17     |\n",
+      "|    ep_rew_mean        | -0.335   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80500    |\n",
+      "|    time_elapsed       | 1377     |\n",
+      "|    total_timesteps    | 402500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.988    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80499    |\n",
+      "|    policy_loss        | 0.249    |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00297  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.29     |\n",
+      "|    ep_rew_mean        | -0.355   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80600    |\n",
+      "|    time_elapsed       | 1379     |\n",
+      "|    total_timesteps    | 403000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.69    |\n",
+      "|    explained_variance | 0.897    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80599    |\n",
+      "|    policy_loss        | -0.571   |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.0145   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.49     |\n",
+      "|    ep_rew_mean        | -0.376   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80700    |\n",
+      "|    time_elapsed       | 1381     |\n",
+      "|    total_timesteps    | 403500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.802    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80699    |\n",
+      "|    policy_loss        | 0.05     |\n",
+      "|    std                | 0.595    |\n",
+      "|    value_loss         | 0.00319  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.22     |\n",
+      "|    ep_rew_mean        | -0.343   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80800    |\n",
+      "|    time_elapsed       | 1382     |\n",
+      "|    total_timesteps    | 404000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.72    |\n",
+      "|    explained_variance | 0.759    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80799    |\n",
+      "|    policy_loss        | 0.0934   |\n",
+      "|    std                | 0.595    |\n",
+      "|    value_loss         | 0.000847 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.33     |\n",
+      "|    ep_rew_mean        | -0.35    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 80900    |\n",
+      "|    time_elapsed       | 1384     |\n",
+      "|    total_timesteps    | 404500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.965    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80899    |\n",
+      "|    policy_loss        | 0.128    |\n",
+      "|    std                | 0.596    |\n",
+      "|    value_loss         | 0.00167  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.6      |\n",
+      "|    ep_rew_mean        | -0.373   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81000    |\n",
+      "|    time_elapsed       | 1385     |\n",
+      "|    total_timesteps    | 405000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.982    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 80999    |\n",
+      "|    policy_loss        | 0.286    |\n",
+      "|    std                | 0.595    |\n",
+      "|    value_loss         | 0.0024   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.89     |\n",
+      "|    ep_rew_mean        | -0.313   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81100    |\n",
+      "|    time_elapsed       | 1387     |\n",
+      "|    total_timesteps    | 405500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.7     |\n",
+      "|    explained_variance | 0.941    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81099    |\n",
+      "|    policy_loss        | 0.141    |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00134  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.65     |\n",
+      "|    ep_rew_mean        | -0.296   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81200    |\n",
+      "|    time_elapsed       | 1390     |\n",
+      "|    total_timesteps    | 406000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.774    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81199    |\n",
+      "|    policy_loss        | 0.00445  |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.000498 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.8      |\n",
+      "|    ep_rew_mean        | -0.309   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81300    |\n",
+      "|    time_elapsed       | 1391     |\n",
+      "|    total_timesteps    | 406500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.834    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81299    |\n",
+      "|    policy_loss        | 0.0849   |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00068  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.79     |\n",
+      "|    ep_rew_mean        | -0.31    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81400    |\n",
+      "|    time_elapsed       | 1393     |\n",
+      "|    total_timesteps    | 407000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.85     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81399    |\n",
+      "|    policy_loss        | -0.114   |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00289  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.95     |\n",
+      "|    ep_rew_mean        | -0.32    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81500    |\n",
+      "|    time_elapsed       | 1394     |\n",
+      "|    total_timesteps    | 407500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.7     |\n",
+      "|    explained_variance | 0.717    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81499    |\n",
+      "|    policy_loss        | 0.383    |\n",
+      "|    std                | 0.59     |\n",
+      "|    value_loss         | 0.00841  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.82     |\n",
+      "|    ep_rew_mean        | -0.307   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81600    |\n",
+      "|    time_elapsed       | 1396     |\n",
+      "|    total_timesteps    | 408000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.68    |\n",
+      "|    explained_variance | 0.958    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81599    |\n",
+      "|    policy_loss        | 0.0597   |\n",
+      "|    std                | 0.588    |\n",
+      "|    value_loss         | 0.0012   |\n",
+      "------------------------------------\n",
+      "-------------------------------------\n",
+      "| rollout/              |           |\n",
+      "|    ep_len_mean        | 4.04      |\n",
+      "|    ep_rew_mean        | -0.322    |\n",
+      "|    success_rate       | 1         |\n",
+      "| time/                 |           |\n",
+      "|    fps                | 292       |\n",
+      "|    iterations         | 81700     |\n",
+      "|    time_elapsed       | 1397      |\n",
+      "|    total_timesteps    | 408500    |\n",
+      "| train/                |           |\n",
+      "|    entropy_loss       | -5.7      |\n",
+      "|    explained_variance | 0.986     |\n",
+      "|    learning_rate      | 0.0007    |\n",
+      "|    n_updates          | 81699     |\n",
+      "|    policy_loss        | -0.000123 |\n",
+      "|    std                | 0.59      |\n",
+      "|    value_loss         | 0.00023   |\n",
+      "-------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.47     |\n",
+      "|    ep_rew_mean        | -0.367   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81800    |\n",
+      "|    time_elapsed       | 1399     |\n",
+      "|    total_timesteps    | 409000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.928    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81799    |\n",
+      "|    policy_loss        | 0.321    |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00345  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.65     |\n",
+      "|    ep_rew_mean        | -0.391   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 81900    |\n",
+      "|    time_elapsed       | 1401     |\n",
+      "|    total_timesteps    | 409500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.7     |\n",
+      "|    explained_variance | -0.235   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81899    |\n",
+      "|    policy_loss        | -0.0123  |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.00437  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.51     |\n",
+      "|    ep_rew_mean        | -0.373   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82000    |\n",
+      "|    time_elapsed       | 1403     |\n",
+      "|    total_timesteps    | 410000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.74    |\n",
+      "|    explained_variance | 0.727    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 81999    |\n",
+      "|    policy_loss        | -0.604   |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.0221   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.49     |\n",
+      "|    ep_rew_mean        | -0.538   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82100    |\n",
+      "|    time_elapsed       | 1405     |\n",
+      "|    total_timesteps    | 410500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.74    |\n",
+      "|    explained_variance | -0.231   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82099    |\n",
+      "|    policy_loss        | 1.1      |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.138    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.05     |\n",
+      "|    ep_rew_mean        | -0.397   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82200    |\n",
+      "|    time_elapsed       | 1407     |\n",
+      "|    total_timesteps    | 411000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.969    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82199    |\n",
+      "|    policy_loss        | 0.634    |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00847  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.67     |\n",
+      "|    ep_rew_mean        | -0.299   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82300    |\n",
+      "|    time_elapsed       | 1408     |\n",
+      "|    total_timesteps    | 411500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.949    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82299    |\n",
+      "|    policy_loss        | 0.327    |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00555  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.84     |\n",
+      "|    ep_rew_mean        | -0.305   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82400    |\n",
+      "|    time_elapsed       | 1410     |\n",
+      "|    total_timesteps    | 412000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.992    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82399    |\n",
+      "|    policy_loss        | 0.147    |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00121  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.5      |\n",
+      "|    ep_rew_mean        | -0.282   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82500    |\n",
+      "|    time_elapsed       | 1411     |\n",
+      "|    total_timesteps    | 412500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.829    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82499    |\n",
+      "|    policy_loss        | -0.338   |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00608  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.85     |\n",
+      "|    ep_rew_mean        | -0.316   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82600    |\n",
+      "|    time_elapsed       | 1413     |\n",
+      "|    total_timesteps    | 413000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.72    |\n",
+      "|    explained_variance | -0.164   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82599    |\n",
+      "|    policy_loss        | 0.247    |\n",
+      "|    std                | 0.594    |\n",
+      "|    value_loss         | 0.00841  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.34     |\n",
+      "|    ep_rew_mean        | -0.273   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82700    |\n",
+      "|    time_elapsed       | 1416     |\n",
+      "|    total_timesteps    | 413500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.68    |\n",
+      "|    explained_variance | 0.965    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82699    |\n",
+      "|    policy_loss        | 0.32     |\n",
+      "|    std                | 0.59     |\n",
+      "|    value_loss         | 0.00267  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.82     |\n",
+      "|    ep_rew_mean        | -0.329   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82800    |\n",
+      "|    time_elapsed       | 1417     |\n",
+      "|    total_timesteps    | 414000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.69    |\n",
+      "|    explained_variance | 0.977    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82799    |\n",
+      "|    policy_loss        | -0.101   |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.000629 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.21     |\n",
+      "|    ep_rew_mean        | -0.363   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 82900    |\n",
+      "|    time_elapsed       | 1419     |\n",
+      "|    total_timesteps    | 414500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.7     |\n",
+      "|    explained_variance | 0.975    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82899    |\n",
+      "|    policy_loss        | 0.0751   |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.000352 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.88     |\n",
+      "|    ep_rew_mean        | -0.32    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83000    |\n",
+      "|    time_elapsed       | 1420     |\n",
+      "|    total_timesteps    | 415000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.69    |\n",
+      "|    explained_variance | 0.841    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 82999    |\n",
+      "|    policy_loss        | 0.213    |\n",
+      "|    std                | 0.59     |\n",
+      "|    value_loss         | 0.00137  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.67     |\n",
+      "|    ep_rew_mean        | -0.295   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83100    |\n",
+      "|    time_elapsed       | 1422     |\n",
+      "|    total_timesteps    | 415500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.68    |\n",
+      "|    explained_variance | 0.966    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83099    |\n",
+      "|    policy_loss        | -0.237   |\n",
+      "|    std                | 0.589    |\n",
+      "|    value_loss         | 0.00289  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.52     |\n",
+      "|    ep_rew_mean        | -0.274   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83200    |\n",
+      "|    time_elapsed       | 1424     |\n",
+      "|    total_timesteps    | 416000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.67    |\n",
+      "|    explained_variance | -2.42    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83199    |\n",
+      "|    policy_loss        | 0.458    |\n",
+      "|    std                | 0.587    |\n",
+      "|    value_loss         | 0.0118   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.23     |\n",
+      "|    ep_rew_mean        | -0.36    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83300    |\n",
+      "|    time_elapsed       | 1425     |\n",
+      "|    total_timesteps    | 416500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.68    |\n",
+      "|    explained_variance | 0.987    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83299    |\n",
+      "|    policy_loss        | 0.107    |\n",
+      "|    std                | 0.588    |\n",
+      "|    value_loss         | 0.0012   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.64     |\n",
+      "|    ep_rew_mean        | -0.299   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83400    |\n",
+      "|    time_elapsed       | 1428     |\n",
+      "|    total_timesteps    | 417000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.67    |\n",
+      "|    explained_variance | 0.936    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83399    |\n",
+      "|    policy_loss        | 0.136    |\n",
+      "|    std                | 0.588    |\n",
+      "|    value_loss         | 0.00604  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.84     |\n",
+      "|    ep_rew_mean        | -0.319   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 83500    |\n",
+      "|    time_elapsed       | 1429     |\n",
+      "|    total_timesteps    | 417500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.7     |\n",
+      "|    explained_variance | 0.99     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83499    |\n",
+      "|    policy_loss        | -0.298   |\n",
+      "|    std                | 0.59     |\n",
+      "|    value_loss         | 0.00216  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.73     |\n",
+      "|    ep_rew_mean        | -0.307   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 83600    |\n",
+      "|    time_elapsed       | 1431     |\n",
+      "|    total_timesteps    | 418000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.7     |\n",
+      "|    explained_variance | 0.774    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83599    |\n",
+      "|    policy_loss        | 0.0687   |\n",
+      "|    std                | 0.591    |\n",
+      "|    value_loss         | 0.000266 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.45     |\n",
+      "|    ep_rew_mean        | -0.28    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83700    |\n",
+      "|    time_elapsed       | 1433     |\n",
+      "|    total_timesteps    | 418500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.72    |\n",
+      "|    explained_variance | 0.99     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83699    |\n",
+      "|    policy_loss        | -0.146   |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.000823 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.84     |\n",
+      "|    ep_rew_mean        | -0.322   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83800    |\n",
+      "|    time_elapsed       | 1434     |\n",
+      "|    total_timesteps    | 419000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.73    |\n",
+      "|    explained_variance | 0.914    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83799    |\n",
+      "|    policy_loss        | 0.443    |\n",
+      "|    std                | 0.593    |\n",
+      "|    value_loss         | 0.00823  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.87     |\n",
+      "|    ep_rew_mean        | -0.3     |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 83900    |\n",
+      "|    time_elapsed       | 1436     |\n",
+      "|    total_timesteps    | 419500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.72    |\n",
+      "|    explained_variance | 0.614    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83899    |\n",
+      "|    policy_loss        | -0.0973  |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00337  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.02     |\n",
+      "|    ep_rew_mean        | -0.332   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 84000    |\n",
+      "|    time_elapsed       | 1437     |\n",
+      "|    total_timesteps    | 420000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.71    |\n",
+      "|    explained_variance | 0.973    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 83999    |\n",
+      "|    policy_loss        | 0.0913   |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00151  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.17     |\n",
+      "|    ep_rew_mean        | -0.348   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 84100    |\n",
+      "|    time_elapsed       | 1439     |\n",
+      "|    total_timesteps    | 420500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.67    |\n",
+      "|    explained_variance | 0.974    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84099    |\n",
+      "|    policy_loss        | 0.041    |\n",
+      "|    std                | 0.59     |\n",
+      "|    value_loss         | 0.000968 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.85     |\n",
+      "|    ep_rew_mean        | -0.325   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 84200    |\n",
+      "|    time_elapsed       | 1442     |\n",
+      "|    total_timesteps    | 421000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.67    |\n",
+      "|    explained_variance | 0.924    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84199    |\n",
+      "|    policy_loss        | -0.255   |\n",
+      "|    std                | 0.59     |\n",
+      "|    value_loss         | 0.0018   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.92     |\n",
+      "|    ep_rew_mean        | -0.315   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 84300    |\n",
+      "|    time_elapsed       | 1443     |\n",
+      "|    total_timesteps    | 421500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.69    |\n",
+      "|    explained_variance | 0.942    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84299    |\n",
+      "|    policy_loss        | -0.291   |\n",
+      "|    std                | 0.592    |\n",
+      "|    value_loss         | 0.00266  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.4      |\n",
+      "|    ep_rew_mean        | -0.343   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 84400    |\n",
+      "|    time_elapsed       | 1445     |\n",
+      "|    total_timesteps    | 422000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.64    |\n",
+      "|    explained_variance | 0.878    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84399    |\n",
+      "|    policy_loss        | 0.33     |\n",
+      "|    std                | 0.588    |\n",
+      "|    value_loss         | 0.00649  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.72     |\n",
+      "|    ep_rew_mean        | -0.295   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 84500    |\n",
+      "|    time_elapsed       | 1446     |\n",
+      "|    total_timesteps    | 422500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.61    |\n",
+      "|    explained_variance | 0.991    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84499    |\n",
+      "|    policy_loss        | -0.0852  |\n",
+      "|    std                | 0.585    |\n",
+      "|    value_loss         | 0.000318 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.77     |\n",
+      "|    ep_rew_mean        | -0.307   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 84600    |\n",
+      "|    time_elapsed       | 1448     |\n",
+      "|    total_timesteps    | 423000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.59    |\n",
+      "|    explained_variance | 0.819    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84599    |\n",
+      "|    policy_loss        | -0.25    |\n",
+      "|    std                | 0.583    |\n",
+      "|    value_loss         | 0.00259  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.67     |\n",
+      "|    ep_rew_mean        | -0.292   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 84700    |\n",
+      "|    time_elapsed       | 1449     |\n",
+      "|    total_timesteps    | 423500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.6     |\n",
+      "|    explained_variance | -0.439   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84699    |\n",
+      "|    policy_loss        | -1.65    |\n",
+      "|    std                | 0.584    |\n",
+      "|    value_loss         | 0.0698   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.72     |\n",
+      "|    ep_rew_mean        | -0.298   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 84800    |\n",
+      "|    time_elapsed       | 1451     |\n",
+      "|    total_timesteps    | 424000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.57    |\n",
+      "|    explained_variance | 0.354    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84799    |\n",
+      "|    policy_loss        | 0.125    |\n",
+      "|    std                | 0.581    |\n",
+      "|    value_loss         | 0.00793  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.79     |\n",
+      "|    ep_rew_mean        | -0.311   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 84900    |\n",
+      "|    time_elapsed       | 1453     |\n",
+      "|    total_timesteps    | 424500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.55    |\n",
+      "|    explained_variance | 0.996    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84899    |\n",
+      "|    policy_loss        | -0.29    |\n",
+      "|    std                | 0.579    |\n",
+      "|    value_loss         | 0.00248  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.26     |\n",
+      "|    ep_rew_mean        | -0.341   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 85000    |\n",
+      "|    time_elapsed       | 1455     |\n",
+      "|    total_timesteps    | 425000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.55    |\n",
+      "|    explained_variance | 0.909    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 84999    |\n",
+      "|    policy_loss        | -0.351   |\n",
+      "|    std                | 0.579    |\n",
+      "|    value_loss         | 0.00391  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.34     |\n",
+      "|    ep_rew_mean        | -0.464   |\n",
+      "|    success_rate       | 0.98     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85100    |\n",
+      "|    time_elapsed       | 1457     |\n",
+      "|    total_timesteps    | 425500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.58    |\n",
+      "|    explained_variance | -0.283   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85099    |\n",
+      "|    policy_loss        | 1.26     |\n",
+      "|    std                | 0.582    |\n",
+      "|    value_loss         | 0.184    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.8      |\n",
+      "|    ep_rew_mean        | -0.477   |\n",
+      "|    success_rate       | 0.97     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85200    |\n",
+      "|    time_elapsed       | 1458     |\n",
+      "|    total_timesteps    | 426000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.55    |\n",
+      "|    explained_variance | -0.125   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85199    |\n",
+      "|    policy_loss        | 0.00426  |\n",
+      "|    std                | 0.581    |\n",
+      "|    value_loss         | 0.00751  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.77     |\n",
+      "|    ep_rew_mean        | -0.38    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85300    |\n",
+      "|    time_elapsed       | 1460     |\n",
+      "|    total_timesteps    | 426500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.57    |\n",
+      "|    explained_variance | 0.705    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85299    |\n",
+      "|    policy_loss        | 0.509    |\n",
+      "|    std                | 0.582    |\n",
+      "|    value_loss         | 0.0211   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.2      |\n",
+      "|    ep_rew_mean        | -0.331   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85400    |\n",
+      "|    time_elapsed       | 1461     |\n",
+      "|    total_timesteps    | 427000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.53    |\n",
+      "|    explained_variance | 0.988    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85399    |\n",
+      "|    policy_loss        | 0.0383   |\n",
+      "|    std                | 0.579    |\n",
+      "|    value_loss         | 0.000396 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.04     |\n",
+      "|    ep_rew_mean        | -0.319   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85500    |\n",
+      "|    time_elapsed       | 1463     |\n",
+      "|    total_timesteps    | 427500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.52    |\n",
+      "|    explained_variance | 0.167    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85499    |\n",
+      "|    policy_loss        | -1.1     |\n",
+      "|    std                | 0.578    |\n",
+      "|    value_loss         | 0.0346   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.79     |\n",
+      "|    ep_rew_mean        | -0.451   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85600    |\n",
+      "|    time_elapsed       | 1464     |\n",
+      "|    total_timesteps    | 428000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.51    |\n",
+      "|    explained_variance | 0.79     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85599    |\n",
+      "|    policy_loss        | 0.477    |\n",
+      "|    std                | 0.577    |\n",
+      "|    value_loss         | 0.0124   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.72     |\n",
+      "|    ep_rew_mean        | -0.536   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85700    |\n",
+      "|    time_elapsed       | 1466     |\n",
+      "|    total_timesteps    | 428500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.5     |\n",
+      "|    explained_variance | 0.936    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85699    |\n",
+      "|    policy_loss        | -0.829   |\n",
+      "|    std                | 0.577    |\n",
+      "|    value_loss         | 0.0212   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.57     |\n",
+      "|    ep_rew_mean        | -0.283   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85800    |\n",
+      "|    time_elapsed       | 1469     |\n",
+      "|    total_timesteps    | 429000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.48    |\n",
+      "|    explained_variance | 0.886    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85799    |\n",
+      "|    policy_loss        | -0.126   |\n",
+      "|    std                | 0.576    |\n",
+      "|    value_loss         | 0.0011   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.07     |\n",
+      "|    ep_rew_mean        | -0.325   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 85900    |\n",
+      "|    time_elapsed       | 1470     |\n",
+      "|    total_timesteps    | 429500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.46    |\n",
+      "|    explained_variance | 0.842    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85899    |\n",
+      "|    policy_loss        | -0.48    |\n",
+      "|    std                | 0.575    |\n",
+      "|    value_loss         | 0.00765  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.6      |\n",
+      "|    ep_rew_mean        | -0.374   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86000    |\n",
+      "|    time_elapsed       | 1472     |\n",
+      "|    total_timesteps    | 430000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.48    |\n",
+      "|    explained_variance | 0.544    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 85999    |\n",
+      "|    policy_loss        | 0.364    |\n",
+      "|    std                | 0.576    |\n",
+      "|    value_loss         | 0.00622  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.87     |\n",
+      "|    ep_rew_mean        | -0.315   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86100    |\n",
+      "|    time_elapsed       | 1473     |\n",
+      "|    total_timesteps    | 430500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.46    |\n",
+      "|    explained_variance | 0.959    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86099    |\n",
+      "|    policy_loss        | 0.149    |\n",
+      "|    std                | 0.574    |\n",
+      "|    value_loss         | 0.00123  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.19     |\n",
+      "|    ep_rew_mean        | -0.426   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86200    |\n",
+      "|    time_elapsed       | 1475     |\n",
+      "|    total_timesteps    | 431000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.46    |\n",
+      "|    explained_variance | 0.908    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86199    |\n",
+      "|    policy_loss        | -0.419   |\n",
+      "|    std                | 0.574    |\n",
+      "|    value_loss         | 0.00646  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.05     |\n",
+      "|    ep_rew_mean        | -0.587   |\n",
+      "|    success_rate       | 0.95     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86300    |\n",
+      "|    time_elapsed       | 1476     |\n",
+      "|    total_timesteps    | 431500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.47    |\n",
+      "|    explained_variance | -2.37    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86299    |\n",
+      "|    policy_loss        | -2.05    |\n",
+      "|    std                | 0.574    |\n",
+      "|    value_loss         | 0.226    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.41     |\n",
+      "|    ep_rew_mean        | -0.784   |\n",
+      "|    success_rate       | 0.94     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86400    |\n",
+      "|    time_elapsed       | 1478     |\n",
+      "|    total_timesteps    | 432000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.49    |\n",
+      "|    explained_variance | 0.0145   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86399    |\n",
+      "|    policy_loss        | -1.71    |\n",
+      "|    std                | 0.575    |\n",
+      "|    value_loss         | 0.13     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.48     |\n",
+      "|    ep_rew_mean        | -0.769   |\n",
+      "|    success_rate       | 0.96     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86500    |\n",
+      "|    time_elapsed       | 1480     |\n",
+      "|    total_timesteps    | 432500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.49    |\n",
+      "|    explained_variance | -4.19    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86499    |\n",
+      "|    policy_loss        | 0.346    |\n",
+      "|    std                | 0.575    |\n",
+      "|    value_loss         | 0.0309   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 8.6      |\n",
+      "|    ep_rew_mean        | -0.696   |\n",
+      "|    success_rate       | 0.96     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86600    |\n",
+      "|    time_elapsed       | 1482     |\n",
+      "|    total_timesteps    | 433000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.5     |\n",
+      "|    explained_variance | -46.5    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86599    |\n",
+      "|    policy_loss        | 8.6      |\n",
+      "|    std                | 0.576    |\n",
+      "|    value_loss         | 1.75     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.05     |\n",
+      "|    ep_rew_mean        | -0.394   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86700    |\n",
+      "|    time_elapsed       | 1484     |\n",
+      "|    total_timesteps    | 433500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.49    |\n",
+      "|    explained_variance | 0.913    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86699    |\n",
+      "|    policy_loss        | -1.02    |\n",
+      "|    std                | 0.575    |\n",
+      "|    value_loss         | 0.0308   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.43     |\n",
+      "|    ep_rew_mean        | -0.349   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86800    |\n",
+      "|    time_elapsed       | 1485     |\n",
+      "|    total_timesteps    | 434000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.47    |\n",
+      "|    explained_variance | 0.896    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86799    |\n",
+      "|    policy_loss        | -0.2     |\n",
+      "|    std                | 0.574    |\n",
+      "|    value_loss         | 0.00323  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.9      |\n",
+      "|    ep_rew_mean        | -0.379   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 86900    |\n",
+      "|    time_elapsed       | 1487     |\n",
+      "|    total_timesteps    | 434500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.45    |\n",
+      "|    explained_variance | 0.824    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86899    |\n",
+      "|    policy_loss        | -0.86    |\n",
+      "|    std                | 0.573    |\n",
+      "|    value_loss         | 0.0525   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.01     |\n",
+      "|    ep_rew_mean        | -0.329   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87000    |\n",
+      "|    time_elapsed       | 1489     |\n",
+      "|    total_timesteps    | 435000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.42    |\n",
+      "|    explained_variance | 0.942    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 86999    |\n",
+      "|    policy_loss        | 0.0193   |\n",
+      "|    std                | 0.571    |\n",
+      "|    value_loss         | 0.000476 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.8      |\n",
+      "|    ep_rew_mean        | -0.314   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87100    |\n",
+      "|    time_elapsed       | 1490     |\n",
+      "|    total_timesteps    | 435500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.39    |\n",
+      "|    explained_variance | 0.897    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87099    |\n",
+      "|    policy_loss        | 0.146    |\n",
+      "|    std                | 0.569    |\n",
+      "|    value_loss         | 0.00423  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.93     |\n",
+      "|    ep_rew_mean        | -0.32    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87200    |\n",
+      "|    time_elapsed       | 1492     |\n",
+      "|    total_timesteps    | 436000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.37    |\n",
+      "|    explained_variance | 0.868    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87199    |\n",
+      "|    policy_loss        | 0.147    |\n",
+      "|    std                | 0.568    |\n",
+      "|    value_loss         | 0.004    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.89     |\n",
+      "|    ep_rew_mean        | -0.314   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87300    |\n",
+      "|    time_elapsed       | 1494     |\n",
+      "|    total_timesteps    | 436500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.37    |\n",
+      "|    explained_variance | 0.494    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87299    |\n",
+      "|    policy_loss        | 0.165    |\n",
+      "|    std                | 0.567    |\n",
+      "|    value_loss         | 0.004    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.37     |\n",
+      "|    ep_rew_mean        | -0.369   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87400    |\n",
+      "|    time_elapsed       | 1496     |\n",
+      "|    total_timesteps    | 437000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.34    |\n",
+      "|    explained_variance | 0.976    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87399    |\n",
+      "|    policy_loss        | 0.155    |\n",
+      "|    std                | 0.566    |\n",
+      "|    value_loss         | 0.00141  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.8      |\n",
+      "|    ep_rew_mean        | -0.306   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87500    |\n",
+      "|    time_elapsed       | 1498     |\n",
+      "|    total_timesteps    | 437500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.35    |\n",
+      "|    explained_variance | 0.924    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87499    |\n",
+      "|    policy_loss        | 0.0842   |\n",
+      "|    std                | 0.566    |\n",
+      "|    value_loss         | 0.00362  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.52     |\n",
+      "|    ep_rew_mean        | -0.365   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87600    |\n",
+      "|    time_elapsed       | 1499     |\n",
+      "|    total_timesteps    | 438000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.35    |\n",
+      "|    explained_variance | 0.57     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87599    |\n",
+      "|    policy_loss        | 0.215    |\n",
+      "|    std                | 0.566    |\n",
+      "|    value_loss         | 0.00175  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.03     |\n",
+      "|    ep_rew_mean        | -0.315   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87700    |\n",
+      "|    time_elapsed       | 1501     |\n",
+      "|    total_timesteps    | 438500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.35    |\n",
+      "|    explained_variance | 0.946    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87699    |\n",
+      "|    policy_loss        | 0.0643   |\n",
+      "|    std                | 0.567    |\n",
+      "|    value_loss         | 0.00121  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.85     |\n",
+      "|    ep_rew_mean        | -0.303   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87800    |\n",
+      "|    time_elapsed       | 1502     |\n",
+      "|    total_timesteps    | 439000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.33    |\n",
+      "|    explained_variance | 0.981    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87799    |\n",
+      "|    policy_loss        | 0.233    |\n",
+      "|    std                | 0.564    |\n",
+      "|    value_loss         | 0.0028   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.27     |\n",
+      "|    ep_rew_mean        | -0.354   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 87900    |\n",
+      "|    time_elapsed       | 1504     |\n",
+      "|    total_timesteps    | 439500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.33    |\n",
+      "|    explained_variance | 0.726    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87899    |\n",
+      "|    policy_loss        | -0.0954  |\n",
+      "|    std                | 0.565    |\n",
+      "|    value_loss         | 0.00221  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.84     |\n",
+      "|    ep_rew_mean        | -0.306   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 88000    |\n",
+      "|    time_elapsed       | 1506     |\n",
+      "|    total_timesteps    | 440000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.35    |\n",
+      "|    explained_variance | 0.856    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 87999    |\n",
+      "|    policy_loss        | -0.739   |\n",
+      "|    std                | 0.565    |\n",
+      "|    value_loss         | 0.0165   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.63     |\n",
+      "|    ep_rew_mean        | -0.289   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 88100    |\n",
+      "|    time_elapsed       | 1508     |\n",
+      "|    total_timesteps    | 440500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.35    |\n",
+      "|    explained_variance | 0.862    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88099    |\n",
+      "|    policy_loss        | 0.227    |\n",
+      "|    std                | 0.565    |\n",
+      "|    value_loss         | 0.00193  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.29     |\n",
+      "|    ep_rew_mean        | -0.348   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 88200    |\n",
+      "|    time_elapsed       | 1510     |\n",
+      "|    total_timesteps    | 441000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.35    |\n",
+      "|    explained_variance | 0.919    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88199    |\n",
+      "|    policy_loss        | 0.11     |\n",
+      "|    std                | 0.565    |\n",
+      "|    value_loss         | 0.000803 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.96     |\n",
+      "|    ep_rew_mean        | -0.31    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 88300    |\n",
+      "|    time_elapsed       | 1512     |\n",
+      "|    total_timesteps    | 441500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.36    |\n",
+      "|    explained_variance | 0.856    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88299    |\n",
+      "|    policy_loss        | -0.059   |\n",
+      "|    std                | 0.565    |\n",
+      "|    value_loss         | 0.000924 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.08     |\n",
+      "|    ep_rew_mean        | -0.335   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 88400    |\n",
+      "|    time_elapsed       | 1513     |\n",
+      "|    total_timesteps    | 442000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.36    |\n",
+      "|    explained_variance | 0.984    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88399    |\n",
+      "|    policy_loss        | 0.0521   |\n",
+      "|    std                | 0.565    |\n",
+      "|    value_loss         | 0.000558 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.28     |\n",
+      "|    ep_rew_mean        | -0.355   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 88500    |\n",
+      "|    time_elapsed       | 1515     |\n",
+      "|    total_timesteps    | 442500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.37    |\n",
+      "|    explained_variance | 0.971    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88499    |\n",
+      "|    policy_loss        | -0.156   |\n",
+      "|    std                | 0.566    |\n",
+      "|    value_loss         | 0.00142  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.51     |\n",
+      "|    ep_rew_mean        | -0.379   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 88600    |\n",
+      "|    time_elapsed       | 1516     |\n",
+      "|    total_timesteps    | 443000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.34    |\n",
+      "|    explained_variance | 0.975    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88599    |\n",
+      "|    policy_loss        | 0.169    |\n",
+      "|    std                | 0.564    |\n",
+      "|    value_loss         | 0.0035   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.47     |\n",
+      "|    ep_rew_mean        | -0.357   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 88700    |\n",
+      "|    time_elapsed       | 1518     |\n",
+      "|    total_timesteps    | 443500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.31    |\n",
+      "|    explained_variance | 0.984    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88699    |\n",
+      "|    policy_loss        | -0.341   |\n",
+      "|    std                | 0.562    |\n",
+      "|    value_loss         | 0.00445  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.27     |\n",
+      "|    ep_rew_mean        | -0.358   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 88800    |\n",
+      "|    time_elapsed       | 1520     |\n",
+      "|    total_timesteps    | 444000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.3     |\n",
+      "|    explained_variance | -0.359   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88799    |\n",
+      "|    policy_loss        | -0.222   |\n",
+      "|    std                | 0.562    |\n",
+      "|    value_loss         | 0.00447  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.38     |\n",
+      "|    ep_rew_mean        | -0.36    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 88900    |\n",
+      "|    time_elapsed       | 1522     |\n",
+      "|    total_timesteps    | 444500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.28    |\n",
+      "|    explained_variance | 0.844    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88899    |\n",
+      "|    policy_loss        | -0.251   |\n",
+      "|    std                | 0.56     |\n",
+      "|    value_loss         | 0.00396  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.44     |\n",
+      "|    ep_rew_mean        | -0.367   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89000    |\n",
+      "|    time_elapsed       | 1524     |\n",
+      "|    total_timesteps    | 445000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.28    |\n",
+      "|    explained_variance | 0.933    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 88999    |\n",
+      "|    policy_loss        | 0.112    |\n",
+      "|    std                | 0.558    |\n",
+      "|    value_loss         | 0.00269  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.28     |\n",
+      "|    ep_rew_mean        | -0.449   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89100    |\n",
+      "|    time_elapsed       | 1525     |\n",
+      "|    total_timesteps    | 445500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.28    |\n",
+      "|    explained_variance | 0.741    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89099    |\n",
+      "|    policy_loss        | -0.725   |\n",
+      "|    std                | 0.559    |\n",
+      "|    value_loss         | 0.0419   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.84     |\n",
+      "|    ep_rew_mean        | -0.411   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89200    |\n",
+      "|    time_elapsed       | 1527     |\n",
+      "|    total_timesteps    | 446000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.29    |\n",
+      "|    explained_variance | 0.97     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89199    |\n",
+      "|    policy_loss        | -0.18    |\n",
+      "|    std                | 0.561    |\n",
+      "|    value_loss         | 0.00599  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.99     |\n",
+      "|    ep_rew_mean        | -0.414   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 89300    |\n",
+      "|    time_elapsed       | 1529     |\n",
+      "|    total_timesteps    | 446500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.32    |\n",
+      "|    explained_variance | 0.89     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89299    |\n",
+      "|    policy_loss        | 0.141    |\n",
+      "|    std                | 0.563    |\n",
+      "|    value_loss         | 0.00322  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.34     |\n",
+      "|    ep_rew_mean        | -0.34    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 89400    |\n",
+      "|    time_elapsed       | 1530     |\n",
+      "|    total_timesteps    | 447000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.36    |\n",
+      "|    explained_variance | 0.845    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89399    |\n",
+      "|    policy_loss        | -0.0196  |\n",
+      "|    std                | 0.566    |\n",
+      "|    value_loss         | 0.0014   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.15     |\n",
+      "|    ep_rew_mean        | -0.344   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89500    |\n",
+      "|    time_elapsed       | 1532     |\n",
+      "|    total_timesteps    | 447500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.34    |\n",
+      "|    explained_variance | 0.378    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89499    |\n",
+      "|    policy_loss        | -0.317   |\n",
+      "|    std                | 0.565    |\n",
+      "|    value_loss         | 0.00366  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.18     |\n",
+      "|    ep_rew_mean        | -0.342   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89600    |\n",
+      "|    time_elapsed       | 1534     |\n",
+      "|    total_timesteps    | 448000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.31    |\n",
+      "|    explained_variance | 0.657    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89599    |\n",
+      "|    policy_loss        | 0.0751   |\n",
+      "|    std                | 0.561    |\n",
+      "|    value_loss         | 0.000955 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.44     |\n",
+      "|    ep_rew_mean        | -0.366   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89700    |\n",
+      "|    time_elapsed       | 1536     |\n",
+      "|    total_timesteps    | 448500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.27    |\n",
+      "|    explained_variance | 0.95     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89699    |\n",
+      "|    policy_loss        | 0.0631   |\n",
+      "|    std                | 0.558    |\n",
+      "|    value_loss         | 0.000703 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.29     |\n",
+      "|    ep_rew_mean        | -0.351   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89800    |\n",
+      "|    time_elapsed       | 1538     |\n",
+      "|    total_timesteps    | 449000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.26    |\n",
+      "|    explained_variance | -0.707   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89799    |\n",
+      "|    policy_loss        | 0.94     |\n",
+      "|    std                | 0.557    |\n",
+      "|    value_loss         | 0.046    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.03     |\n",
+      "|    ep_rew_mean        | -0.333   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 89900    |\n",
+      "|    time_elapsed       | 1539     |\n",
+      "|    total_timesteps    | 449500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.26    |\n",
+      "|    explained_variance | 0.993    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89899    |\n",
+      "|    policy_loss        | 0.22     |\n",
+      "|    std                | 0.557    |\n",
+      "|    value_loss         | 0.00141  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.16     |\n",
+      "|    ep_rew_mean        | -0.336   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90000    |\n",
+      "|    time_elapsed       | 1541     |\n",
+      "|    total_timesteps    | 450000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.29    |\n",
+      "|    explained_variance | 0.971    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 89999    |\n",
+      "|    policy_loss        | 0.0163   |\n",
+      "|    std                | 0.558    |\n",
+      "|    value_loss         | 0.00295  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.2      |\n",
+      "|    ep_rew_mean        | -0.344   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90100    |\n",
+      "|    time_elapsed       | 1542     |\n",
+      "|    total_timesteps    | 450500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.29    |\n",
+      "|    explained_variance | 0.982    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90099    |\n",
+      "|    policy_loss        | 0.118    |\n",
+      "|    std                | 0.558    |\n",
+      "|    value_loss         | 0.000764 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.49     |\n",
+      "|    ep_rew_mean        | -0.362   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 90200    |\n",
+      "|    time_elapsed       | 1544     |\n",
+      "|    total_timesteps    | 451000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.28    |\n",
+      "|    explained_variance | 0.742    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90199    |\n",
+      "|    policy_loss        | 0.15     |\n",
+      "|    std                | 0.558    |\n",
+      "|    value_loss         | 0.00237  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.7      |\n",
+      "|    ep_rew_mean        | -0.291   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90300    |\n",
+      "|    time_elapsed       | 1546     |\n",
+      "|    total_timesteps    | 451500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.27    |\n",
+      "|    explained_variance | 0.765    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90299    |\n",
+      "|    policy_loss        | 0.112    |\n",
+      "|    std                | 0.556    |\n",
+      "|    value_loss         | 0.0024   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.95     |\n",
+      "|    ep_rew_mean        | -0.317   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90400    |\n",
+      "|    time_elapsed       | 1548     |\n",
+      "|    total_timesteps    | 452000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.29    |\n",
+      "|    explained_variance | 0.957    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90399    |\n",
+      "|    policy_loss        | 0.126    |\n",
+      "|    std                | 0.558    |\n",
+      "|    value_loss         | 0.000852 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.89     |\n",
+      "|    ep_rew_mean        | -0.323   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90500    |\n",
+      "|    time_elapsed       | 1550     |\n",
+      "|    total_timesteps    | 452500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.28    |\n",
+      "|    explained_variance | 0.796    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90499    |\n",
+      "|    policy_loss        | -0.371   |\n",
+      "|    std                | 0.559    |\n",
+      "|    value_loss         | 0.0071   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.59     |\n",
+      "|    ep_rew_mean        | -0.282   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90600    |\n",
+      "|    time_elapsed       | 1551     |\n",
+      "|    total_timesteps    | 453000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.26    |\n",
+      "|    explained_variance | 0.925    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90599    |\n",
+      "|    policy_loss        | 0.165    |\n",
+      "|    std                | 0.556    |\n",
+      "|    value_loss         | 0.00161  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.95     |\n",
+      "|    ep_rew_mean        | -0.315   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90700    |\n",
+      "|    time_elapsed       | 1553     |\n",
+      "|    total_timesteps    | 453500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.27    |\n",
+      "|    explained_variance | 0.883    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90699    |\n",
+      "|    policy_loss        | 0.0258   |\n",
+      "|    std                | 0.558    |\n",
+      "|    value_loss         | 0.000999 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.96     |\n",
+      "|    ep_rew_mean        | -0.317   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90800    |\n",
+      "|    time_elapsed       | 1555     |\n",
+      "|    total_timesteps    | 454000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.22    |\n",
+      "|    explained_variance | 0.928    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90799    |\n",
+      "|    policy_loss        | -0.117   |\n",
+      "|    std                | 0.554    |\n",
+      "|    value_loss         | 0.00208  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.82     |\n",
+      "|    ep_rew_mean        | -0.309   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 90900    |\n",
+      "|    time_elapsed       | 1556     |\n",
+      "|    total_timesteps    | 454500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.21    |\n",
+      "|    explained_variance | 0.996    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90899    |\n",
+      "|    policy_loss        | -0.123   |\n",
+      "|    std                | 0.553    |\n",
+      "|    value_loss         | 0.000549 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.6      |\n",
+      "|    ep_rew_mean        | -0.287   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 91000    |\n",
+      "|    time_elapsed       | 1558     |\n",
+      "|    total_timesteps    | 455000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.19    |\n",
+      "|    explained_variance | 0.989    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 90999    |\n",
+      "|    policy_loss        | -0.136   |\n",
+      "|    std                | 0.551    |\n",
+      "|    value_loss         | 0.000819 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.26     |\n",
+      "|    ep_rew_mean        | -0.34    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91100    |\n",
+      "|    time_elapsed       | 1560     |\n",
+      "|    total_timesteps    | 455500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.18    |\n",
+      "|    explained_variance | 0.901    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91099    |\n",
+      "|    policy_loss        | 0.234    |\n",
+      "|    std                | 0.551    |\n",
+      "|    value_loss         | 0.00392  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.4      |\n",
+      "|    ep_rew_mean        | -0.371   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91200    |\n",
+      "|    time_elapsed       | 1562     |\n",
+      "|    total_timesteps    | 456000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.16    |\n",
+      "|    explained_variance | 0.927    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91199    |\n",
+      "|    policy_loss        | 0.0133   |\n",
+      "|    std                | 0.549    |\n",
+      "|    value_loss         | 0.000819 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.05     |\n",
+      "|    ep_rew_mean        | -0.336   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91300    |\n",
+      "|    time_elapsed       | 1564     |\n",
+      "|    total_timesteps    | 456500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.15    |\n",
+      "|    explained_variance | 0.968    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91299    |\n",
+      "|    policy_loss        | 0.262    |\n",
+      "|    std                | 0.547    |\n",
+      "|    value_loss         | 0.00341  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.63     |\n",
+      "|    ep_rew_mean        | -0.291   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91400    |\n",
+      "|    time_elapsed       | 1565     |\n",
+      "|    total_timesteps    | 457000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.13    |\n",
+      "|    explained_variance | 0.97     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91399    |\n",
+      "|    policy_loss        | 0.158    |\n",
+      "|    std                | 0.546    |\n",
+      "|    value_loss         | 0.00105  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.86     |\n",
+      "|    ep_rew_mean        | -0.315   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91500    |\n",
+      "|    time_elapsed       | 1567     |\n",
+      "|    total_timesteps    | 457500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.14    |\n",
+      "|    explained_variance | 0.979    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91499    |\n",
+      "|    policy_loss        | -0.158   |\n",
+      "|    std                | 0.547    |\n",
+      "|    value_loss         | 0.00411  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.65     |\n",
+      "|    ep_rew_mean        | -0.288   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91600    |\n",
+      "|    time_elapsed       | 1568     |\n",
+      "|    total_timesteps    | 458000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.13    |\n",
+      "|    explained_variance | 0.992    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91599    |\n",
+      "|    policy_loss        | -0.00905 |\n",
+      "|    std                | 0.545    |\n",
+      "|    value_loss         | 0.000151 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.21     |\n",
+      "|    ep_rew_mean        | -0.26    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91700    |\n",
+      "|    time_elapsed       | 1570     |\n",
+      "|    total_timesteps    | 458500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.1     |\n",
+      "|    explained_variance | 0.964    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91699    |\n",
+      "|    policy_loss        | -0.0771  |\n",
+      "|    std                | 0.544    |\n",
+      "|    value_loss         | 0.000724 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.41     |\n",
+      "|    ep_rew_mean        | -0.279   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91800    |\n",
+      "|    time_elapsed       | 1572     |\n",
+      "|    total_timesteps    | 459000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.14    |\n",
+      "|    explained_variance | 0.952    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91799    |\n",
+      "|    policy_loss        | 0.214    |\n",
+      "|    std                | 0.546    |\n",
+      "|    value_loss         | 0.00181  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.68     |\n",
+      "|    ep_rew_mean        | -0.296   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 91900    |\n",
+      "|    time_elapsed       | 1574     |\n",
+      "|    total_timesteps    | 459500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.12    |\n",
+      "|    explained_variance | 0.99     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91899    |\n",
+      "|    policy_loss        | -0.047   |\n",
+      "|    std                | 0.545    |\n",
+      "|    value_loss         | 0.000246 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.83     |\n",
+      "|    ep_rew_mean        | -0.314   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92000    |\n",
+      "|    time_elapsed       | 1576     |\n",
+      "|    total_timesteps    | 460000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.13    |\n",
+      "|    explained_variance | 0.97     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 91999    |\n",
+      "|    policy_loss        | 0.0479   |\n",
+      "|    std                | 0.546    |\n",
+      "|    value_loss         | 0.000446 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 3.87     |\n",
+      "|    ep_rew_mean        | -0.308   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92100    |\n",
+      "|    time_elapsed       | 1577     |\n",
+      "|    total_timesteps    | 460500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.18    |\n",
+      "|    explained_variance | 0.998    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92099    |\n",
+      "|    policy_loss        | 0.0181   |\n",
+      "|    std                | 0.549    |\n",
+      "|    value_loss         | 2.54e-05 |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.34     |\n",
+      "|    ep_rew_mean        | -0.358   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92200    |\n",
+      "|    time_elapsed       | 1579     |\n",
+      "|    total_timesteps    | 461000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.16    |\n",
+      "|    explained_variance | 0.675    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92199    |\n",
+      "|    policy_loss        | 0.102    |\n",
+      "|    std                | 0.548    |\n",
+      "|    value_loss         | 0.0023   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.46     |\n",
+      "|    ep_rew_mean        | -0.369   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92300    |\n",
+      "|    time_elapsed       | 1581     |\n",
+      "|    total_timesteps    | 461500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.15    |\n",
+      "|    explained_variance | 0.359    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92299    |\n",
+      "|    policy_loss        | -1.15    |\n",
+      "|    std                | 0.547    |\n",
+      "|    value_loss         | 0.0583   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.53     |\n",
+      "|    ep_rew_mean        | -0.385   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92400    |\n",
+      "|    time_elapsed       | 1582     |\n",
+      "|    total_timesteps    | 462000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.16    |\n",
+      "|    explained_variance | 0.953    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92399    |\n",
+      "|    policy_loss        | 0.187    |\n",
+      "|    std                | 0.548    |\n",
+      "|    value_loss         | 0.00202  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.12     |\n",
+      "|    ep_rew_mean        | -0.341   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92500    |\n",
+      "|    time_elapsed       | 1584     |\n",
+      "|    total_timesteps    | 462500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.16    |\n",
+      "|    explained_variance | 0.323    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92499    |\n",
+      "|    policy_loss        | -0.142   |\n",
+      "|    std                | 0.548    |\n",
+      "|    value_loss         | 0.0019   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.66     |\n",
+      "|    ep_rew_mean        | -0.396   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92600    |\n",
+      "|    time_elapsed       | 1586     |\n",
+      "|    total_timesteps    | 463000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.17    |\n",
+      "|    explained_variance | 0.633    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92599    |\n",
+      "|    policy_loss        | -0.19    |\n",
+      "|    std                | 0.549    |\n",
+      "|    value_loss         | 0.00488  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.83     |\n",
+      "|    ep_rew_mean        | -0.481   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92700    |\n",
+      "|    time_elapsed       | 1588     |\n",
+      "|    total_timesteps    | 463500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.17    |\n",
+      "|    explained_variance | 0.173    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92699    |\n",
+      "|    policy_loss        | -1.09    |\n",
+      "|    std                | 0.549    |\n",
+      "|    value_loss         | 0.0262   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.82     |\n",
+      "|    ep_rew_mean        | -0.486   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92800    |\n",
+      "|    time_elapsed       | 1589     |\n",
+      "|    total_timesteps    | 464000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.19    |\n",
+      "|    explained_variance | 0.193    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92799    |\n",
+      "|    policy_loss        | 0.189    |\n",
+      "|    std                | 0.551    |\n",
+      "|    value_loss         | 0.00871  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.93     |\n",
+      "|    ep_rew_mean        | -0.407   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 92900    |\n",
+      "|    time_elapsed       | 1591     |\n",
+      "|    total_timesteps    | 464500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.17    |\n",
+      "|    explained_variance | 0.925    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92899    |\n",
+      "|    policy_loss        | 0.402    |\n",
+      "|    std                | 0.551    |\n",
+      "|    value_loss         | 0.00877  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.01     |\n",
+      "|    ep_rew_mean        | -0.413   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93000    |\n",
+      "|    time_elapsed       | 1593     |\n",
+      "|    total_timesteps    | 465000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.2     |\n",
+      "|    explained_variance | 0.385    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 92999    |\n",
+      "|    policy_loss        | -0.181   |\n",
+      "|    std                | 0.552    |\n",
+      "|    value_loss         | 0.0143   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.35     |\n",
+      "|    ep_rew_mean        | -0.42    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93100    |\n",
+      "|    time_elapsed       | 1594     |\n",
+      "|    total_timesteps    | 465500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.2     |\n",
+      "|    explained_variance | 0.261    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93099    |\n",
+      "|    policy_loss        | 0.02     |\n",
+      "|    std                | 0.552    |\n",
+      "|    value_loss         | 0.0137   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.38     |\n",
+      "|    ep_rew_mean        | -0.426   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93200    |\n",
+      "|    time_elapsed       | 1596     |\n",
+      "|    total_timesteps    | 466000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.22    |\n",
+      "|    explained_variance | 0.522    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93199    |\n",
+      "|    policy_loss        | 0.0501   |\n",
+      "|    std                | 0.555    |\n",
+      "|    value_loss         | 0.0048   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.74     |\n",
+      "|    ep_rew_mean        | -0.386   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93300    |\n",
+      "|    time_elapsed       | 1597     |\n",
+      "|    total_timesteps    | 466500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.23    |\n",
+      "|    explained_variance | 0.929    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93299    |\n",
+      "|    policy_loss        | -0.566   |\n",
+      "|    std                | 0.555    |\n",
+      "|    value_loss         | 0.0126   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.41     |\n",
+      "|    ep_rew_mean        | -0.433   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93400    |\n",
+      "|    time_elapsed       | 1600     |\n",
+      "|    total_timesteps    | 467000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.23    |\n",
+      "|    explained_variance | 0.741    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93399    |\n",
+      "|    policy_loss        | -0.0202  |\n",
+      "|    std                | 0.555    |\n",
+      "|    value_loss         | 0.0059   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.51     |\n",
+      "|    ep_rew_mean        | -0.524   |\n",
+      "|    success_rate       | 0.97     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93500    |\n",
+      "|    time_elapsed       | 1601     |\n",
+      "|    total_timesteps    | 467500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.23    |\n",
+      "|    explained_variance | -1.96    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93499    |\n",
+      "|    policy_loss        | -0.322   |\n",
+      "|    std                | 0.555    |\n",
+      "|    value_loss         | 0.0224   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.67     |\n",
+      "|    ep_rew_mean        | -0.468   |\n",
+      "|    success_rate       | 0.98     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93600    |\n",
+      "|    time_elapsed       | 1603     |\n",
+      "|    total_timesteps    | 468000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.25    |\n",
+      "|    explained_variance | -0.998   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93599    |\n",
+      "|    policy_loss        | 1.93     |\n",
+      "|    std                | 0.557    |\n",
+      "|    value_loss         | 0.152    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.81     |\n",
+      "|    ep_rew_mean        | -0.401   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93700    |\n",
+      "|    time_elapsed       | 1605     |\n",
+      "|    total_timesteps    | 468500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.19    |\n",
+      "|    explained_variance | 0.846    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93699    |\n",
+      "|    policy_loss        | 0.731    |\n",
+      "|    std                | 0.552    |\n",
+      "|    value_loss         | 0.0279   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.44     |\n",
+      "|    ep_rew_mean        | -0.463   |\n",
+      "|    success_rate       | 0.98     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93800    |\n",
+      "|    time_elapsed       | 1606     |\n",
+      "|    total_timesteps    | 469000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.21    |\n",
+      "|    explained_variance | -5.17    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93799    |\n",
+      "|    policy_loss        | -1.75    |\n",
+      "|    std                | 0.553    |\n",
+      "|    value_loss         | 0.187    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.54     |\n",
+      "|    ep_rew_mean        | -0.549   |\n",
+      "|    success_rate       | 0.97     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 93900    |\n",
+      "|    time_elapsed       | 1608     |\n",
+      "|    total_timesteps    | 469500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.22    |\n",
+      "|    explained_variance | 0.768    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93899    |\n",
+      "|    policy_loss        | -3.04    |\n",
+      "|    std                | 0.554    |\n",
+      "|    value_loss         | 0.181    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.49     |\n",
+      "|    ep_rew_mean        | -0.665   |\n",
+      "|    success_rate       | 0.95     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94000    |\n",
+      "|    time_elapsed       | 1609     |\n",
+      "|    total_timesteps    | 470000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.26    |\n",
+      "|    explained_variance | 0.486    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 93999    |\n",
+      "|    policy_loss        | -1.39    |\n",
+      "|    std                | 0.557    |\n",
+      "|    value_loss         | 0.0548   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.95     |\n",
+      "|    ep_rew_mean        | -0.731   |\n",
+      "|    success_rate       | 0.95     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94100    |\n",
+      "|    time_elapsed       | 1611     |\n",
+      "|    total_timesteps    | 470500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.25    |\n",
+      "|    explained_variance | 0.996    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94099    |\n",
+      "|    policy_loss        | -2.14    |\n",
+      "|    std                | 0.556    |\n",
+      "|    value_loss         | 0.218    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 8.86     |\n",
+      "|    ep_rew_mean        | -0.816   |\n",
+      "|    success_rate       | 0.96     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94200    |\n",
+      "|    time_elapsed       | 1613     |\n",
+      "|    total_timesteps    | 471000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.23    |\n",
+      "|    explained_variance | -5.15    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94199    |\n",
+      "|    policy_loss        | -0.979   |\n",
+      "|    std                | 0.554    |\n",
+      "|    value_loss         | 0.0471   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.19     |\n",
+      "|    ep_rew_mean        | -0.626   |\n",
+      "|    success_rate       | 0.97     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94300    |\n",
+      "|    time_elapsed       | 1615     |\n",
+      "|    total_timesteps    | 471500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.22    |\n",
+      "|    explained_variance | -6.99    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94299    |\n",
+      "|    policy_loss        | 2.25     |\n",
+      "|    std                | 0.554    |\n",
+      "|    value_loss         | 0.721    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.89     |\n",
+      "|    ep_rew_mean        | -0.404   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94400    |\n",
+      "|    time_elapsed       | 1617     |\n",
+      "|    total_timesteps    | 472000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.22    |\n",
+      "|    explained_variance | -28.2    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94399    |\n",
+      "|    policy_loss        | 0.0877   |\n",
+      "|    std                | 0.552    |\n",
+      "|    value_loss         | 0.0975   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.46     |\n",
+      "|    ep_rew_mean        | -0.453   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94500    |\n",
+      "|    time_elapsed       | 1618     |\n",
+      "|    total_timesteps    | 472500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.21    |\n",
+      "|    explained_variance | 0.714    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94499    |\n",
+      "|    policy_loss        | 0.184    |\n",
+      "|    std                | 0.551    |\n",
+      "|    value_loss         | 0.0135   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.65     |\n",
+      "|    ep_rew_mean        | -0.47    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94600    |\n",
+      "|    time_elapsed       | 1620     |\n",
+      "|    total_timesteps    | 473000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.2     |\n",
+      "|    explained_variance | 0.927    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94599    |\n",
+      "|    policy_loss        | -1.22    |\n",
+      "|    std                | 0.551    |\n",
+      "|    value_loss         | 0.0714   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.53     |\n",
+      "|    ep_rew_mean        | -0.368   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94700    |\n",
+      "|    time_elapsed       | 1621     |\n",
+      "|    total_timesteps    | 473500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.22    |\n",
+      "|    explained_variance | 0.758    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94699    |\n",
+      "|    policy_loss        | -1.27    |\n",
+      "|    std                | 0.551    |\n",
+      "|    value_loss         | 0.0714   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.87     |\n",
+      "|    ep_rew_mean        | -0.406   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94800    |\n",
+      "|    time_elapsed       | 1623     |\n",
+      "|    total_timesteps    | 474000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.2     |\n",
+      "|    explained_variance | 0.604    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94799    |\n",
+      "|    policy_loss        | -1.39    |\n",
+      "|    std                | 0.549    |\n",
+      "|    value_loss         | 0.107    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.27     |\n",
+      "|    ep_rew_mean        | -0.442   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 94900    |\n",
+      "|    time_elapsed       | 1625     |\n",
+      "|    total_timesteps    | 474500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.19    |\n",
+      "|    explained_variance | 0.875    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94899    |\n",
+      "|    policy_loss        | 0.334    |\n",
+      "|    std                | 0.549    |\n",
+      "|    value_loss         | 0.0181   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.5      |\n",
+      "|    ep_rew_mean        | -0.352   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95000    |\n",
+      "|    time_elapsed       | 1627     |\n",
+      "|    total_timesteps    | 475000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.19    |\n",
+      "|    explained_variance | -16.2    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 94999    |\n",
+      "|    policy_loss        | -2.42    |\n",
+      "|    std                | 0.55     |\n",
+      "|    value_loss         | 0.328    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.71     |\n",
+      "|    ep_rew_mean        | -0.38    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95100    |\n",
+      "|    time_elapsed       | 1629     |\n",
+      "|    total_timesteps    | 475500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.18    |\n",
+      "|    explained_variance | -4.3     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95099    |\n",
+      "|    policy_loss        | 0.787    |\n",
+      "|    std                | 0.55     |\n",
+      "|    value_loss         | 0.0919   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.88     |\n",
+      "|    ep_rew_mean        | -0.472   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95200    |\n",
+      "|    time_elapsed       | 1630     |\n",
+      "|    total_timesteps    | 476000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.17    |\n",
+      "|    explained_variance | 0.917    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95199    |\n",
+      "|    policy_loss        | 0.108    |\n",
+      "|    std                | 0.548    |\n",
+      "|    value_loss         | 0.00221  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.49     |\n",
+      "|    ep_rew_mean        | -0.366   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95300    |\n",
+      "|    time_elapsed       | 1632     |\n",
+      "|    total_timesteps    | 476500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.14    |\n",
+      "|    explained_variance | 0.693    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95299    |\n",
+      "|    policy_loss        | 0.0213   |\n",
+      "|    std                | 0.547    |\n",
+      "|    value_loss         | 0.00328  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.18     |\n",
+      "|    ep_rew_mean        | -0.331   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95400    |\n",
+      "|    time_elapsed       | 1633     |\n",
+      "|    total_timesteps    | 477000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.12    |\n",
+      "|    explained_variance | -0.111   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95399    |\n",
+      "|    policy_loss        | 0.127    |\n",
+      "|    std                | 0.546    |\n",
+      "|    value_loss         | 0.016    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.19     |\n",
+      "|    ep_rew_mean        | -0.326   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95500    |\n",
+      "|    time_elapsed       | 1635     |\n",
+      "|    total_timesteps    | 477500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.1     |\n",
+      "|    explained_variance | 0.591    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95499    |\n",
+      "|    policy_loss        | 0.0447   |\n",
+      "|    std                | 0.546    |\n",
+      "|    value_loss         | 0.0102   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.42     |\n",
+      "|    ep_rew_mean        | -0.356   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 95600    |\n",
+      "|    time_elapsed       | 1636     |\n",
+      "|    total_timesteps    | 478000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.08    |\n",
+      "|    explained_variance | 0.922    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95599    |\n",
+      "|    policy_loss        | -0.0391  |\n",
+      "|    std                | 0.545    |\n",
+      "|    value_loss         | 0.00233  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.4      |\n",
+      "|    ep_rew_mean        | -0.354   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95700    |\n",
+      "|    time_elapsed       | 1638     |\n",
+      "|    total_timesteps    | 478500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.11    |\n",
+      "|    explained_variance | 0.736    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95699    |\n",
+      "|    policy_loss        | 0.19     |\n",
+      "|    std                | 0.547    |\n",
+      "|    value_loss         | 0.00221  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.18     |\n",
+      "|    ep_rew_mean        | -0.325   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95800    |\n",
+      "|    time_elapsed       | 1641     |\n",
+      "|    total_timesteps    | 479000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.11    |\n",
+      "|    explained_variance | 0.903    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95799    |\n",
+      "|    policy_loss        | -0.319   |\n",
+      "|    std                | 0.547    |\n",
+      "|    value_loss         | 0.00559  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.28     |\n",
+      "|    ep_rew_mean        | -0.346   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 95900    |\n",
+      "|    time_elapsed       | 1642     |\n",
+      "|    total_timesteps    | 479500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.07    |\n",
+      "|    explained_variance | 0.175    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95899    |\n",
+      "|    policy_loss        | 0.138    |\n",
+      "|    std                | 0.546    |\n",
+      "|    value_loss         | 0.00413  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.67     |\n",
+      "|    ep_rew_mean        | -0.377   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96000    |\n",
+      "|    time_elapsed       | 1644     |\n",
+      "|    total_timesteps    | 480000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.09    |\n",
+      "|    explained_variance | 0.888    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 95999    |\n",
+      "|    policy_loss        | -0.0767  |\n",
+      "|    std                | 0.549    |\n",
+      "|    value_loss         | 0.00145  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.88     |\n",
+      "|    ep_rew_mean        | -0.472   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96100    |\n",
+      "|    time_elapsed       | 1645     |\n",
+      "|    total_timesteps    | 480500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.05    |\n",
+      "|    explained_variance | 0.952    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96099    |\n",
+      "|    policy_loss        | -0.475   |\n",
+      "|    std                | 0.546    |\n",
+      "|    value_loss         | 0.016    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.97     |\n",
+      "|    ep_rew_mean        | -0.602   |\n",
+      "|    success_rate       | 0.97     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96200    |\n",
+      "|    time_elapsed       | 1647     |\n",
+      "|    total_timesteps    | 481000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.02    |\n",
+      "|    explained_variance | -1.26    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96199    |\n",
+      "|    policy_loss        | -2.03    |\n",
+      "|    std                | 0.543    |\n",
+      "|    value_loss         | 0.243    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.52     |\n",
+      "|    ep_rew_mean        | -0.482   |\n",
+      "|    success_rate       | 0.98     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96300    |\n",
+      "|    time_elapsed       | 1649     |\n",
+      "|    total_timesteps    | 481500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5       |\n",
+      "|    explained_variance | 0.558    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96299    |\n",
+      "|    policy_loss        | -1.29    |\n",
+      "|    std                | 0.542    |\n",
+      "|    value_loss         | 0.0935   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.48     |\n",
+      "|    ep_rew_mean        | -0.64    |\n",
+      "|    success_rate       | 0.95     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 292      |\n",
+      "|    iterations         | 96400    |\n",
+      "|    time_elapsed       | 1650     |\n",
+      "|    total_timesteps    | 482000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5.01    |\n",
+      "|    explained_variance | -1.73    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96399    |\n",
+      "|    policy_loss        | 0.695    |\n",
+      "|    std                | 0.542    |\n",
+      "|    value_loss         | 0.0338   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.99     |\n",
+      "|    ep_rew_mean        | -0.408   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96500    |\n",
+      "|    time_elapsed       | 1652     |\n",
+      "|    total_timesteps    | 482500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.98    |\n",
+      "|    explained_variance | 0.653    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96499    |\n",
+      "|    policy_loss        | 0.972    |\n",
+      "|    std                | 0.54     |\n",
+      "|    value_loss         | 0.0399   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.32     |\n",
+      "|    ep_rew_mean        | -0.351   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96600    |\n",
+      "|    time_elapsed       | 1654     |\n",
+      "|    total_timesteps    | 483000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.96    |\n",
+      "|    explained_variance | -1.03    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96599    |\n",
+      "|    policy_loss        | 0.438    |\n",
+      "|    std                | 0.54     |\n",
+      "|    value_loss         | 0.196    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.63     |\n",
+      "|    ep_rew_mean        | -0.443   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96700    |\n",
+      "|    time_elapsed       | 1656     |\n",
+      "|    total_timesteps    | 483500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -5       |\n",
+      "|    explained_variance | 0.886    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96699    |\n",
+      "|    policy_loss        | -0.0131  |\n",
+      "|    std                | 0.543    |\n",
+      "|    value_loss         | 0.00365  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 7.53     |\n",
+      "|    ep_rew_mean        | -0.641   |\n",
+      "|    success_rate       | 0.96     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96800    |\n",
+      "|    time_elapsed       | 1658     |\n",
+      "|    total_timesteps    | 484000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.98    |\n",
+      "|    explained_variance | -0.246   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96799    |\n",
+      "|    policy_loss        | -1.48    |\n",
+      "|    std                | 0.541    |\n",
+      "|    value_loss         | 0.0927   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 10.1     |\n",
+      "|    ep_rew_mean        | -0.936   |\n",
+      "|    success_rate       | 0.91     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 96900    |\n",
+      "|    time_elapsed       | 1659     |\n",
+      "|    total_timesteps    | 484500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.97    |\n",
+      "|    explained_variance | -0.406   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96899    |\n",
+      "|    policy_loss        | -0.988   |\n",
+      "|    std                | 0.541    |\n",
+      "|    value_loss         | 0.05     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 12.2     |\n",
+      "|    ep_rew_mean        | -1.15    |\n",
+      "|    success_rate       | 0.86     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97000    |\n",
+      "|    time_elapsed       | 1661     |\n",
+      "|    total_timesteps    | 485000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.98    |\n",
+      "|    explained_variance | -707     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 96999    |\n",
+      "|    policy_loss        | -6.14    |\n",
+      "|    std                | 0.542    |\n",
+      "|    value_loss         | 1.24     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 13       |\n",
+      "|    ep_rew_mean        | -1.23    |\n",
+      "|    success_rate       | 0.81     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97100    |\n",
+      "|    time_elapsed       | 1662     |\n",
+      "|    total_timesteps    | 485500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.94    |\n",
+      "|    explained_variance | -13.6    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97099    |\n",
+      "|    policy_loss        | 0.328    |\n",
+      "|    std                | 0.54     |\n",
+      "|    value_loss         | 0.039    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 12       |\n",
+      "|    ep_rew_mean        | -1.08    |\n",
+      "|    success_rate       | 0.85     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97200    |\n",
+      "|    time_elapsed       | 1664     |\n",
+      "|    total_timesteps    | 486000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.92    |\n",
+      "|    explained_variance | -3.71    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97199    |\n",
+      "|    policy_loss        | 2.07     |\n",
+      "|    std                | 0.539    |\n",
+      "|    value_loss         | 0.19     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 11.6     |\n",
+      "|    ep_rew_mean        | -1.04    |\n",
+      "|    success_rate       | 0.88     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97300    |\n",
+      "|    time_elapsed       | 1667     |\n",
+      "|    total_timesteps    | 486500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.9     |\n",
+      "|    explained_variance | -3.21    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97299    |\n",
+      "|    policy_loss        | -11.8    |\n",
+      "|    std                | 0.537    |\n",
+      "|    value_loss         | 4.18     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 12.9     |\n",
+      "|    ep_rew_mean        | -1.17    |\n",
+      "|    success_rate       | 0.88     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97400    |\n",
+      "|    time_elapsed       | 1668     |\n",
+      "|    total_timesteps    | 487000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.91    |\n",
+      "|    explained_variance | -43.2    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97399    |\n",
+      "|    policy_loss        | -0.773   |\n",
+      "|    std                | 0.538    |\n",
+      "|    value_loss         | 0.036    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 11.6     |\n",
+      "|    ep_rew_mean        | -1.01    |\n",
+      "|    success_rate       | 0.94     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97500    |\n",
+      "|    time_elapsed       | 1670     |\n",
+      "|    total_timesteps    | 487500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.92    |\n",
+      "|    explained_variance | 0.662    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97499    |\n",
+      "|    policy_loss        | 3.37     |\n",
+      "|    std                | 0.54     |\n",
+      "|    value_loss         | 0.473    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.95     |\n",
+      "|    ep_rew_mean        | -0.846   |\n",
+      "|    success_rate       | 0.97     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97600    |\n",
+      "|    time_elapsed       | 1671     |\n",
+      "|    total_timesteps    | 488000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.89    |\n",
+      "|    explained_variance | -4.26    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97599    |\n",
+      "|    policy_loss        | 15       |\n",
+      "|    std                | 0.537    |\n",
+      "|    value_loss         | 26       |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 10.4     |\n",
+      "|    ep_rew_mean        | -0.882   |\n",
+      "|    success_rate       | 0.95     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97700    |\n",
+      "|    time_elapsed       | 1673     |\n",
+      "|    total_timesteps    | 488500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.85    |\n",
+      "|    explained_variance | -4.05    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97699    |\n",
+      "|    policy_loss        | -0.48    |\n",
+      "|    std                | 0.535    |\n",
+      "|    value_loss         | 0.0188   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 10.2     |\n",
+      "|    ep_rew_mean        | -0.86    |\n",
+      "|    success_rate       | 0.95     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97800    |\n",
+      "|    time_elapsed       | 1675     |\n",
+      "|    total_timesteps    | 489000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.87    |\n",
+      "|    explained_variance | -7.07    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97799    |\n",
+      "|    policy_loss        | 4.96     |\n",
+      "|    std                | 0.536    |\n",
+      "|    value_loss         | 1.74     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.68     |\n",
+      "|    ep_rew_mean        | -0.463   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 97900    |\n",
+      "|    time_elapsed       | 1676     |\n",
+      "|    total_timesteps    | 489500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.88    |\n",
+      "|    explained_variance | 0.305    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97899    |\n",
+      "|    policy_loss        | -6.96    |\n",
+      "|    std                | 0.538    |\n",
+      "|    value_loss         | 2.72     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.17     |\n",
+      "|    ep_rew_mean        | -0.434   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98000    |\n",
+      "|    time_elapsed       | 1678     |\n",
+      "|    total_timesteps    | 490000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.87    |\n",
+      "|    explained_variance | 0.393    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 97999    |\n",
+      "|    policy_loss        | 2.03     |\n",
+      "|    std                | 0.536    |\n",
+      "|    value_loss         | 0.483    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.46     |\n",
+      "|    ep_rew_mean        | -0.432   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98100    |\n",
+      "|    time_elapsed       | 1680     |\n",
+      "|    total_timesteps    | 490500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.85    |\n",
+      "|    explained_variance | 0.0917   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98099    |\n",
+      "|    policy_loss        | -2.34    |\n",
+      "|    std                | 0.535    |\n",
+      "|    value_loss         | 0.522    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.6      |\n",
+      "|    ep_rew_mean        | -0.463   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98200    |\n",
+      "|    time_elapsed       | 1682     |\n",
+      "|    total_timesteps    | 491000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.84    |\n",
+      "|    explained_variance | 0.72     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98199    |\n",
+      "|    policy_loss        | 2.01     |\n",
+      "|    std                | 0.535    |\n",
+      "|    value_loss         | 0.0987   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 6.47     |\n",
+      "|    ep_rew_mean        | -0.547   |\n",
+      "|    success_rate       | 0.98     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98300    |\n",
+      "|    time_elapsed       | 1684     |\n",
+      "|    total_timesteps    | 491500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.84    |\n",
+      "|    explained_variance | -4.35    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98299    |\n",
+      "|    policy_loss        | -1.36    |\n",
+      "|    std                | 0.534    |\n",
+      "|    value_loss         | 0.0759   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 8.57     |\n",
+      "|    ep_rew_mean        | -0.733   |\n",
+      "|    success_rate       | 0.92     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98400    |\n",
+      "|    time_elapsed       | 1685     |\n",
+      "|    total_timesteps    | 492000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.85    |\n",
+      "|    explained_variance | -11.9    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98399    |\n",
+      "|    policy_loss        | -0.732   |\n",
+      "|    std                | 0.535    |\n",
+      "|    value_loss         | 0.106    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.05     |\n",
+      "|    ep_rew_mean        | -0.768   |\n",
+      "|    success_rate       | 0.92     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98500    |\n",
+      "|    time_elapsed       | 1687     |\n",
+      "|    total_timesteps    | 492500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.83    |\n",
+      "|    explained_variance | -103     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98499    |\n",
+      "|    policy_loss        | -1.24    |\n",
+      "|    std                | 0.533    |\n",
+      "|    value_loss         | 0.163    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.82     |\n",
+      "|    ep_rew_mean        | -0.833   |\n",
+      "|    success_rate       | 0.94     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98600    |\n",
+      "|    time_elapsed       | 1689     |\n",
+      "|    total_timesteps    | 493000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.87    |\n",
+      "|    explained_variance | -2.64    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98599    |\n",
+      "|    policy_loss        | -1.38    |\n",
+      "|    std                | 0.536    |\n",
+      "|    value_loss         | 0.109    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 9.16     |\n",
+      "|    ep_rew_mean        | -0.792   |\n",
+      "|    success_rate       | 0.96     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98700    |\n",
+      "|    time_elapsed       | 1690     |\n",
+      "|    total_timesteps    | 493500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.88    |\n",
+      "|    explained_variance | -1.82    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98699    |\n",
+      "|    policy_loss        | 19.9     |\n",
+      "|    std                | 0.537    |\n",
+      "|    value_loss         | 21.2     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.74     |\n",
+      "|    ep_rew_mean        | -0.491   |\n",
+      "|    success_rate       | 0.99     |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98800    |\n",
+      "|    time_elapsed       | 1693     |\n",
+      "|    total_timesteps    | 494000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.86    |\n",
+      "|    explained_variance | 0.539    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98799    |\n",
+      "|    policy_loss        | -0.219   |\n",
+      "|    std                | 0.535    |\n",
+      "|    value_loss         | 0.0812   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.8      |\n",
+      "|    ep_rew_mean        | -0.454   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 98900    |\n",
+      "|    time_elapsed       | 1694     |\n",
+      "|    total_timesteps    | 494500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.84    |\n",
+      "|    explained_variance | -7.22    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98899    |\n",
+      "|    policy_loss        | 1.01     |\n",
+      "|    std                | 0.535    |\n",
+      "|    value_loss         | 0.123    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.79     |\n",
+      "|    ep_rew_mean        | -0.398   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99000    |\n",
+      "|    time_elapsed       | 1696     |\n",
+      "|    total_timesteps    | 495000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.8     |\n",
+      "|    explained_variance | 0.704    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 98999    |\n",
+      "|    policy_loss        | -0.619   |\n",
+      "|    std                | 0.532    |\n",
+      "|    value_loss         | 0.0342   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.67     |\n",
+      "|    ep_rew_mean        | -0.381   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99100    |\n",
+      "|    time_elapsed       | 1698     |\n",
+      "|    total_timesteps    | 495500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.77    |\n",
+      "|    explained_variance | -6.73    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99099    |\n",
+      "|    policy_loss        | -0.454   |\n",
+      "|    std                | 0.529    |\n",
+      "|    value_loss         | 0.129    |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.58     |\n",
+      "|    ep_rew_mean        | -0.381   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99200    |\n",
+      "|    time_elapsed       | 1699     |\n",
+      "|    total_timesteps    | 496000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.79    |\n",
+      "|    explained_variance | -4.1     |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99199    |\n",
+      "|    policy_loss        | 0.0709   |\n",
+      "|    std                | 0.531    |\n",
+      "|    value_loss         | 0.02     |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.33     |\n",
+      "|    ep_rew_mean        | -0.352   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99300    |\n",
+      "|    time_elapsed       | 1701     |\n",
+      "|    total_timesteps    | 496500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.78    |\n",
+      "|    explained_variance | -0.18    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99299    |\n",
+      "|    policy_loss        | 0.445    |\n",
+      "|    std                | 0.53     |\n",
+      "|    value_loss         | 0.0119   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.2      |\n",
+      "|    ep_rew_mean        | -0.421   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99400    |\n",
+      "|    time_elapsed       | 1702     |\n",
+      "|    total_timesteps    | 497000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.8     |\n",
+      "|    explained_variance | -0.351   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99399    |\n",
+      "|    policy_loss        | 0.292    |\n",
+      "|    std                | 0.532    |\n",
+      "|    value_loss         | 0.0207   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.05     |\n",
+      "|    ep_rew_mean        | -0.318   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99500    |\n",
+      "|    time_elapsed       | 1705     |\n",
+      "|    total_timesteps    | 497500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.78    |\n",
+      "|    explained_variance | 0.481    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99499    |\n",
+      "|    policy_loss        | -0.367   |\n",
+      "|    std                | 0.529    |\n",
+      "|    value_loss         | 0.0192   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.64     |\n",
+      "|    ep_rew_mean        | -0.387   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99600    |\n",
+      "|    time_elapsed       | 1707     |\n",
+      "|    total_timesteps    | 498000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.76    |\n",
+      "|    explained_variance | -0.395   |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99599    |\n",
+      "|    policy_loss        | 0.939    |\n",
+      "|    std                | 0.527    |\n",
+      "|    value_loss         | 0.0312   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.05     |\n",
+      "|    ep_rew_mean        | -0.41    |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99700    |\n",
+      "|    time_elapsed       | 1708     |\n",
+      "|    total_timesteps    | 498500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.74    |\n",
+      "|    explained_variance | 0.941    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99699    |\n",
+      "|    policy_loss        | -0.00683 |\n",
+      "|    std                | 0.525    |\n",
+      "|    value_loss         | 0.0017   |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.9      |\n",
+      "|    ep_rew_mean        | -0.406   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99800    |\n",
+      "|    time_elapsed       | 1710     |\n",
+      "|    total_timesteps    | 499000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.74    |\n",
+      "|    explained_variance | 0.461    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99799    |\n",
+      "|    policy_loss        | 0.149    |\n",
+      "|    std                | 0.525    |\n",
+      "|    value_loss         | 0.00716  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 5.06     |\n",
+      "|    ep_rew_mean        | -0.414   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 99900    |\n",
+      "|    time_elapsed       | 1712     |\n",
+      "|    total_timesteps    | 499500   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.72    |\n",
+      "|    explained_variance | 0.854    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99899    |\n",
+      "|    policy_loss        | -0.272   |\n",
+      "|    std                | 0.524    |\n",
+      "|    value_loss         | 0.00807  |\n",
+      "------------------------------------\n",
+      "------------------------------------\n",
+      "| rollout/              |          |\n",
+      "|    ep_len_mean        | 4.8      |\n",
+      "|    ep_rew_mean        | -0.397   |\n",
+      "|    success_rate       | 1        |\n",
+      "| time/                 |          |\n",
+      "|    fps                | 291      |\n",
+      "|    iterations         | 100000   |\n",
+      "|    time_elapsed       | 1713     |\n",
+      "|    total_timesteps    | 500000   |\n",
+      "| train/                |          |\n",
+      "|    entropy_loss       | -4.72    |\n",
+      "|    explained_variance | 0.972    |\n",
+      "|    learning_rate      | 0.0007   |\n",
+      "|    n_updates          | 99999    |\n",
+      "|    policy_loss        | 0.206    |\n",
+      "|    std                | 0.523    |\n",
+      "|    value_loss         | 0.00366  |\n",
+      "------------------------------------\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": []
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       "<br>    <style><br>        .wandb-row {<br>            display: flex;<br>            flex-direction: row;<br>            flex-wrap: wrap;<br>            justify-content: flex-start;<br>            width: 100%;<br>        }<br>        .wandb-col {<br>            display: flex;<br>            flex-direction: column;<br>            flex-basis: 100%;<br>            flex: 1;<br>            padding: 10px;<br>        }<br>    </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████</td></tr><tr><td>rollout/ep_len_mean</td><td>█▆▆▆▆▆▃▄▄▄▃▂▅▆▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂</td></tr><tr><td>rollout/ep_rew_mean</td><td>▁▄▄▅▆▆▇▆████████████████████████████████</td></tr><tr><td>rollout/success_rate</td><td>▁▄▃▄▄▅▆██▇████████████████████████████▇█</td></tr><tr><td>time/fps</td><td>▁▂▄▂▅▆▇███▇▇▇▇▇▇▇███▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇</td></tr><tr><td>train/entropy_loss</td><td>▁▁▁▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇█</td></tr><tr><td>train/explained_variance</td><td>▁▇▆▃▃▅▇▆▇█▇██▇████████████▇█████▄█████▆█</td></tr><tr><td>train/learning_rate</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train/policy_loss</td><td>▁▃▂▃▃▃▂▇█▂▁▃▃▃▃▁▂▂▂▂▂▃▂▃▃▂▃▂▂▂▃▂▃▃▃▂▃▃▃▃</td></tr><tr><td>train/std</td><td>█▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁</td></tr><tr><td>train/value_loss</td><td>▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>500000</td></tr><tr><td>rollout/ep_len_mean</td><td>4.8</td></tr><tr><td>rollout/ep_rew_mean</td><td>-0.39746</td></tr><tr><td>rollout/success_rate</td><td>1</td></tr><tr><td>time/fps</td><td>291</td></tr><tr><td>train/entropy_loss</td><td>-4.71591</td></tr><tr><td>train/explained_variance</td><td>0.97199</td></tr><tr><td>train/learning_rate</td><td>0.0007</td></tr><tr><td>train/policy_loss</td><td>0.206</td></tr><tr><td>train/std</td><td>0.52343</td></tr><tr><td>train/value_loss</td><td>0.00366</td></tr></table><br/></div></div>"
+      ]
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">brisk-fog-2</strong> at: <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws</a><br> View project at: <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach</a><br>Synced 5 W&B file(s), 0 media file(s), 3 artifact file(s) and 1 other file(s)"
+      ]
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ],
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20250312_160957-y39cy9ws/logs</code>"
+      ]
+     },
+     "metadata": {}
+    }
+   ]
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "model.save(\"a2c_pandareach\")\n",
+    "\n",
+    "login(token=\"xxx\")\n",
+    "push_to_hub(\n",
+    "            repo_id=\"Thomstr/A2C_PandaReach\",\n",
+    "            filename=\"a2c_pandareach.zip\",\n",
+    "            commit_message=\"Added A2C model for PandaReach with Stable Baselines3\",\n",
+    "        )"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 173,
+     "referenced_widgets": [
+      "4b24dddcdfc64f9d943f67e25675a8d1",
+      "7f8f84aaf0f34b12950cc66d078b2812",
+      "440f7f8102854ac4a38f5c7929446fe6",
+      "f92b16113ecf4cbf999208426529ccad",
+      "04966e7c2fff44d08a5d5d2083c4c36f",
+      "71a187c360684dfa8165f0da5a6bd84b",
+      "48aea33a04ec425291c36aba5afce22e",
+      "33217ae5bf0a484fb3ae7bbf17cf0fbc",
+      "d69510a7c2894695a6ac9d8f03daf543",
+      "6e384db209c243c88dbc91571a4418f7",
+      "e038b80524d4425a86421308d60c7445"
+     ]
+    },
+    "id": "mFGEVNsW-2fq",
+    "outputId": "a40f2762-15fe-4f2b-d03e-ffd598e2cb5b"
+   },
+   "execution_count": 4,
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001B[38;5;4mℹ Pushing repo Thomstr/A2C_PandaReach to the Hugging Face Hub\u001B[0m\n"
+     ]
+    },
+    {
+     "output_type": "display_data",
+     "data": {
+      "text/plain": [
+       "a2c_pandareach.zip:   0%|          | 0.00/114k [00:00<?, ?B/s]"
+      ],
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "4b24dddcdfc64f9d943f67e25675a8d1"
+      }
+     },
+     "metadata": {}
+    },
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "\u001B[38;5;2m✔ Your model has been uploaded to the Hub, you can find it here:\n",
+      "https://huggingface.co/Thomstr/A2C_PandaReach/tree/main/\u001B[0m\n"
+     ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/Thomstr/A2C_PandaReach/commit/62a9cd410bd1e266a040b6966191aa7deaf3eb62', commit_message='Added A2C model for PandaReach with Stable Baselines3', commit_description='', oid='62a9cd410bd1e266a040b6966191aa7deaf3eb62', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Thomstr/A2C_PandaReach', endpoint='https://huggingface.co', repo_type='model', repo_id='Thomstr/A2C_PandaReach'), pr_revision=None, pr_num=None)"
+      ],
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
+      }
+     },
+     "metadata": {},
+     "execution_count": 4
+    }
+   ]
+  }
+ ]
+}
diff --git a/a2c_sb3_panda_reach.py b/a2c_sb3_panda_reach.py
index ec45523687c7c76469ed2df686b056272f80b405..cab34acd1ca2382a69975a98733a2ce0678a2edb 100644
--- a/a2c_sb3_panda_reach.py
+++ b/a2c_sb3_panda_reach.py
@@ -1,45 +1,46 @@
-import gym
+import gymnasium as gym
 import panda_gym
 from stable_baselines3 import A2C
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.vec_env import DummyVecEnv
 import wandb
 from wandb.integration.sb3 import WandbCallback
+from huggingface_hub import login
+from huggingface_sb3 import push_to_hub
 
 
-config = {
-    "policy_type": "MultiInputPolicy",
-    "total_timesteps": 500000,
-    "env_name": "PandaReachJointsDense-v3",
-}
+if __name__ == "__main__":
+    config = {
+        "policy_type": "MultiInputPolicy",
+        "total_timesteps": 500000,
+        "env_name": "PandaReachJointsDense-v3",
+    }
 
-run = wandb.init(
-    project="pandareach",
-    config=config,
-    sync_tensorboard=True,
-    monitor_gym=True,
-    save_code=True,
-)
+    wandb.login(key='xxx')
+
+    run = wandb.init(
+        project="pandareach",
+        config=config,
+        sync_tensorboard=True,
+        monitor_gym=True,
+        save_code=True,
+    )
 
-def make_env():
     env = gym.make(config["env_name"])
-    env = Monitor(env)  # record stats such as returns
-    return env
-
-env = DummyVecEnv([make_env])
-env = gym.make("PandaReachJointsDense-v3")
-model = A2C(config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}")
-model.learn(
-    total_timesteps=config["total_timesteps"],
-    callback=WandbCallback(
+    model = A2C(config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}")
+    model.learn(
+        total_timesteps=config["total_timesteps"],
+        callback=WandbCallback(
+        )
     )
-)
 
-run.finish()
+    run.finish()
+
+    model.save("a2c_pandareach")
 
-login(token="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")
-push_to_hub(
-            repo_id="Thomstr/A2C_PandaReach",
-            filename="a2c_pandareach.zip",
-            commit_message="Added A2C model for PandaReach with Stable Baselines3",
-        )
\ No newline at end of file
+    login(token="xxx")
+    push_to_hub(
+        repo_id="Thomstr/A2C_PandaReach",
+        filename="a2c_pandareach.zip",
+        commit_message="Added A2C model for PandaReach with Stable Baselines3",
+    )
\ No newline at end of file