diff --git a/README.md b/README.md index 40ab7da556692a6b00a8dd31bf1ff62e391d1eba..4e23205ac37cc6f8dc8fc1c912b5571c70c9086d 100644 --- a/README.md +++ b/README.md @@ -25,9 +25,20 @@ see [a2c_sb3_cartpole.py](a2c_sb3_cartpole.py) ### Hugging Face Hub -[Link to the trained model](https://huggingface.co/Thomstr/A2C_CartPole/tree/main) +[Link to the trained model (cartpole)](https://huggingface.co/Thomstr/A2C_CartPole/tree/main) ### Weights & Biases -[Link to the wandb run](https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/cartpole/runs/vh4anh20/workspace?nw=nwuserthomasdgr) +[Link to the wandb run (cartpole)](https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/cartpole/runs/vh4anh20/workspace?nw=nwuserthomasdgr) ### Full workflow with panda-gym +see [a2c_sb3_panda_reach.py](a2c_sb3_panda_reach.py) + +As I couldn't make it work on my PC (difficulties to install panda-gym), I've used Google Colab. + +see my notebook [here (online)](https://colab.research.google.com/drive/1l03F398QLHHVVqJ-GvRgxA4d-cCocF4K?usp=sharing) +or directly [a2c_sb3_panda_reach.ipynb](a2c_sb3_panda_reach.ipynb) + + +[Link to the trained model (panda reach)](https://huggingface.co/Thomstr/A2C_PandaReach/tree/main) + +[Link to the wandb run (panda reach)](https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws?nw=nwuserthomasdgr) diff --git a/a2c_sb3_cartpole.py b/a2c_sb3_cartpole.py index 9a370e3a67325fd902898f59327aed6f8110c66e..46a0747be1cabeda0424c82a3cbef085f78bc55d 100644 --- a/a2c_sb3_cartpole.py +++ b/a2c_sb3_cartpole.py @@ -21,7 +21,7 @@ if __name__ == "__main__": "total_timesteps": 25000, "env_name": "CartPole-v1", } - wandb.login(key='4ac81e81b051a56ebfc528b579021cfc9ed1e5dc') + wandb.login(key='xxxxxxx') run = wandb.init( project="cartpole", config=config, diff --git a/a2c_sb3_panda_reach.ipynb b/a2c_sb3_panda_reach.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..6df3a3ddb5b13969150d36e4d77fa4437d807d0c --- /dev/null +++ b/a2c_sb3_panda_reach.ipynb @@ -0,0 +1,5885 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "4b24dddcdfc64f9d943f67e25675a8d1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7f8f84aaf0f34b12950cc66d078b2812", + "IPY_MODEL_440f7f8102854ac4a38f5c7929446fe6", + "IPY_MODEL_f92b16113ecf4cbf999208426529ccad" + ], + "layout": "IPY_MODEL_04966e7c2fff44d08a5d5d2083c4c36f" + } + }, + "7f8f84aaf0f34b12950cc66d078b2812": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_71a187c360684dfa8165f0da5a6bd84b", + "placeholder": "", + "style": "IPY_MODEL_48aea33a04ec425291c36aba5afce22e", + "value": "a2c_pandareach.zip: 100%" + } + }, + "440f7f8102854ac4a38f5c7929446fe6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33217ae5bf0a484fb3ae7bbf17cf0fbc", + "max": 113685, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d69510a7c2894695a6ac9d8f03daf543", + "value": 113685 + } + }, + "f92b16113ecf4cbf999208426529ccad": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6e384db209c243c88dbc91571a4418f7", + "placeholder": "", + "style": "IPY_MODEL_e038b80524d4425a86421308d60c7445", + "value": " 114k/114k [00:00<00:00, 797kB/s]" + } + }, + "04966e7c2fff44d08a5d5d2083c4c36f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "71a187c360684dfa8165f0da5a6bd84b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48aea33a04ec425291c36aba5afce22e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "33217ae5bf0a484fb3ae7bbf17cf0fbc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d69510a7c2894695a6ac9d8f03daf543": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6e384db209c243c88dbc91571a4418f7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e038b80524d4425a86421308d60c7445": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "!pip install panda-gym" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hvK-6XS59xLJ", + "outputId": "97a119da-523e-418a-ec0d-8d236d9d6a9e" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting panda-gym\n", + " Downloading panda_gym-3.0.7-py3-none-any.whl.metadata (4.3 kB)\n", + "Requirement already satisfied: gymnasium>=0.26 in /usr/local/lib/python3.11/dist-packages (from panda-gym) (1.1.1)\n", + "Collecting pybullet (from panda-gym)\n", + " Downloading pybullet-3.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from panda-gym) (1.26.4)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from panda-gym) (1.14.1)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym) (3.1.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym) (4.12.2)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym) (0.0.4)\n", + "Downloading panda_gym-3.0.7-py3-none-any.whl (23 kB)\n", + "Downloading pybullet-3.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (103.2 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m103.2/103.2 MB\u001B[0m \u001B[31m7.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hInstalling collected packages: pybullet, panda-gym\n", + "Successfully installed panda-gym-3.0.7 pybullet-3.2.7\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install huggingface-sb3==2.3.1" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T4NUJWkfBA4i", + "outputId": "27d51024-d6f1-4d33-958d-f56f67df44d3" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting huggingface-sb3==2.3.1\n", + " Downloading huggingface_sb3-2.3.1-py3-none-any.whl.metadata (6.2 kB)\n", + "Requirement already satisfied: huggingface-hub~=0.8 in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (0.28.1)\n", + "Requirement already satisfied: pyyaml~=6.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (6.0.2)\n", + "Requirement already satisfied: wasabi in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (1.1.3)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (1.26.4)\n", + "Requirement already satisfied: cloudpickle>=1.6 in /usr/local/lib/python3.11/dist-packages (from huggingface-sb3==2.3.1) (3.1.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (24.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (4.12.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2025.1.31)\n", + "Downloading huggingface_sb3-2.3.1-py3-none-any.whl (9.5 kB)\n", + "Installing collected packages: huggingface-sb3\n", + "Successfully installed huggingface-sb3-2.3.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install stable-baselines3[extra]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "bp-mvGab-IUr", + "outputId": "b37be13e-46d5-466c-8b12-8a1dda33fb16" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting stable-baselines3[extra]\n", + " Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)\n", + "Collecting gymnasium<1.1.0,>=0.29.1 (from stable-baselines3[extra])\n", + " Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)\n", + "Requirement already satisfied: numpy<3.0,>=1.20 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (1.26.4)\n", + "Requirement already satisfied: torch<3.0,>=2.3 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.5.1+cu124)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (3.1.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.2.2)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (3.10.0)\n", + "Requirement already satisfied: opencv-python in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (4.11.0.86)\n", + "Requirement already satisfied: pygame in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.6.1)\n", + "Requirement already satisfied: tensorboard>=2.9.1 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (2.18.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (5.9.5)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (4.67.1)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (13.9.4)\n", + "Requirement already satisfied: ale-py>=0.9.0 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (0.10.2)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (from stable-baselines3[extra]) (11.1.0)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium<1.1.0,>=0.29.1->stable-baselines3[extra]) (4.12.2)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium<1.1.0,>=0.29.1->stable-baselines3[extra]) (0.0.4)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.70.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.7)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (24.2)\n", + "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (4.25.6)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (75.1.0)\n", + "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.17.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.1.3)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.17.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.1.6)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (2024.10.0)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.5.147 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (12.4.127)\n", + "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])\n", + " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (3.1.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3[extra]) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3.0,>=2.3->stable-baselines3[extra]) (1.3.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (1.3.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (4.56.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (1.4.8)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (3.2.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3[extra]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3[extra]) (2025.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3[extra]) (2025.1)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich->stable-baselines3[extra]) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich->stable-baselines3[extra]) (2.18.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich->stable-baselines3[extra]) (0.1.2)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.11/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.9.1->stable-baselines3[extra]) (3.0.2)\n", + "Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m958.1/958.1 kB\u001B[0m \u001B[31m12.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m363.4/363.4 MB\u001B[0m \u001B[31m4.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m13.8/13.8 MB\u001B[0m \u001B[31m94.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m24.6/24.6 MB\u001B[0m \u001B[31m71.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m883.7/883.7 kB\u001B[0m \u001B[31m53.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m664.8/664.8 MB\u001B[0m \u001B[31m2.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m211.5/211.5 MB\u001B[0m \u001B[31m5.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m56.3/56.3 MB\u001B[0m \u001B[31m11.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m127.9/127.9 MB\u001B[0m \u001B[31m7.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m207.5/207.5 MB\u001B[0m \u001B[31m6.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m21.1/21.1 MB\u001B[0m \u001B[31m80.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hDownloading stable_baselines3-2.5.0-py3-none-any.whl (183 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m183.9/183.9 kB\u001B[0m \u001B[31m18.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, gymnasium, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, stable-baselines3\n", + " Attempting uninstall: nvidia-nvjitlink-cu12\n", + " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", + " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", + " Attempting uninstall: nvidia-curand-cu12\n", + " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", + " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", + " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", + " Attempting uninstall: nvidia-cufft-cu12\n", + " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", + " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", + " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", + " Attempting uninstall: nvidia-cuda-runtime-cu12\n", + " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", + " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-cupti-cu12\n", + " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cublas-cu12\n", + " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", + " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", + " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", + " Attempting uninstall: gymnasium\n", + " Found existing installation: gymnasium 1.1.1\n", + " Uninstalling gymnasium-1.1.1:\n", + " Successfully uninstalled gymnasium-1.1.1\n", + " Attempting uninstall: nvidia-cusparse-cu12\n", + " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", + " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", + " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", + " Attempting uninstall: nvidia-cudnn-cu12\n", + " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", + " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", + " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", + " Attempting uninstall: nvidia-cusolver-cu12\n", + " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", + " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", + " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", + "Successfully installed gymnasium-1.0.0 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 stable-baselines3-2.5.0\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "gymnasium" + ] + }, + "id": "83027100988940df948a6b693e6748d8" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "v5mGp7Nj9UMT" + }, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "import panda_gym\n", + "from stable_baselines3 import A2C\n", + "from stable_baselines3.common.monitor import Monitor\n", + "from stable_baselines3.common.vec_env import DummyVecEnv\n", + "import wandb\n", + "from wandb.integration.sb3 import WandbCallback\n", + "from huggingface_hub import login\n", + "from huggingface_sb3 import push_to_hub\n" + ] + }, + { + "cell_type": "code", + "source": [ + "config = {\n", + " \"policy_type\": \"MultiInputPolicy\",\n", + " \"total_timesteps\": 500000,\n", + " \"env_name\": \"PandaReachJointsDense-v3\",\n", + " }\n", + "\n", + "wandb.login(key='xxx')\n", + "\n", + "run = wandb.init(\n", + " project=\"pandareach\",\n", + " config=config,\n", + " sync_tensorboard=True,\n", + " monitor_gym=True,\n", + " save_code=True,\n", + ")\n", + "\n", + "env = gym.make(config[\"env_name\"])\n", + "model = A2C(config[\"policy_type\"], env, verbose=1, tensorboard_log=f\"runs/{run.id}\")\n", + "model.learn(\n", + " total_timesteps=config[\"total_timesteps\"],\n", + " callback=WandbCallback(\n", + " )\n", + ")\n", + "\n", + "run.finish()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "collapsed": true, + "id": "wrqry5Vq-1xn", + "outputId": "7f7a2ac5-9e33-484e-f2c3-af2275ec4df0" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/notebook/utils.py:280: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " return LooseVersion(v) >= LooseVersion(check)\n", + "\u001B[34m\u001B[1mwandb\u001B[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n", + "\u001B[34m\u001B[1mwandb\u001B[0m: \u001B[33mWARNING\u001B[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n", + "\u001B[34m\u001B[1mwandb\u001B[0m: \u001B[33mWARNING\u001B[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n", + "\u001B[34m\u001B[1mwandb\u001B[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n", + "\u001B[34m\u001B[1mwandb\u001B[0m: Currently logged in as: \u001B[33mthomasdgr\u001B[0m (\u001B[33mthomasdgr-ecole-centrale-de-lyon\u001B[0m) to \u001B[32mhttps://api.wandb.ai\u001B[0m. Use \u001B[1m`wandb login --relogin`\u001B[0m to force relogin\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Tracking run with wandb version 0.19.8" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Run data is saved locally in <code>/content/wandb/run-20250312_160957-y39cy9ws</code>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Syncing run <strong><a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws' target=\"_blank\">brisk-fog-2</a></strong> to <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View project at <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run at <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001B[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.\u001B[0m\n", + "| std | 0.626 |\n", + "| value_loss | 0.00806 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.76 |\n", + "| ep_rew_mean | -0.537 |\n", + "| success_rate | 0.96 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 73800 |\n", + "| time_elapsed | 1261 |\n", + "| total_timesteps | 369000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.22 |\n", + "| explained_variance | 0.391 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73799 |\n", + "| policy_loss | 0.919 |\n", + "| std | 0.626 |\n", + "| value_loss | 0.193 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.81 |\n", + "| ep_rew_mean | -0.756 |\n", + "| success_rate | 0.92 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 73900 |\n", + "| time_elapsed | 1263 |\n", + "| total_timesteps | 369500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.19 |\n", + "| explained_variance | -0.708 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73899 |\n", + "| policy_loss | 3.03 |\n", + "| std | 0.624 |\n", + "| value_loss | 2.22 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.79 |\n", + "| ep_rew_mean | -0.689 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74000 |\n", + "| time_elapsed | 1264 |\n", + "| total_timesteps | 370000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.19 |\n", + "| explained_variance | 0.464 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73999 |\n", + "| policy_loss | -0.834 |\n", + "| std | 0.625 |\n", + "| value_loss | 0.0336 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.15 |\n", + "| ep_rew_mean | -0.809 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74100 |\n", + "| time_elapsed | 1266 |\n", + "| total_timesteps | 370500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.21 |\n", + "| explained_variance | 0.952 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74099 |\n", + "| policy_loss | -0.512 |\n", + "| std | 0.626 |\n", + "| value_loss | 0.0782 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.98 |\n", + "| ep_rew_mean | -0.937 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74200 |\n", + "| time_elapsed | 1268 |\n", + "| total_timesteps | 371000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.19 |\n", + "| explained_variance | 0.584 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74199 |\n", + "| policy_loss | -1.23 |\n", + "| std | 0.623 |\n", + "| value_loss | 0.036 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.93 |\n", + "| ep_rew_mean | -0.748 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74300 |\n", + "| time_elapsed | 1270 |\n", + "| total_timesteps | 371500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.2 |\n", + "| explained_variance | -1.43 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74299 |\n", + "| policy_loss | 2.67 |\n", + "| std | 0.626 |\n", + "| value_loss | 0.671 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.59 |\n", + "| ep_rew_mean | -0.563 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74400 |\n", + "| time_elapsed | 1272 |\n", + "| total_timesteps | 372000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.19 |\n", + "| explained_variance | -0.742 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74399 |\n", + "| policy_loss | 6.47 |\n", + "| std | 0.626 |\n", + "| value_loss | 1.18 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.7 |\n", + "| ep_rew_mean | -0.507 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74500 |\n", + "| time_elapsed | 1274 |\n", + "| total_timesteps | 372500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.18 |\n", + "| explained_variance | 0.643 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74499 |\n", + "| policy_loss | 0.548 |\n", + "| std | 0.625 |\n", + "| value_loss | 0.0473 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.68 |\n", + "| ep_rew_mean | -0.537 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74600 |\n", + "| time_elapsed | 1275 |\n", + "| total_timesteps | 373000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.14 |\n", + "| explained_variance | -3.11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74599 |\n", + "| policy_loss | 1.45 |\n", + "| std | 0.622 |\n", + "| value_loss | 0.114 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.9 |\n", + "| ep_rew_mean | -0.558 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74700 |\n", + "| time_elapsed | 1277 |\n", + "| total_timesteps | 373500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.16 |\n", + "| explained_variance | 0.473 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74699 |\n", + "| policy_loss | -1.49 |\n", + "| std | 0.623 |\n", + "| value_loss | 0.11 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.53 |\n", + "| ep_rew_mean | -0.505 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74800 |\n", + "| time_elapsed | 1279 |\n", + "| total_timesteps | 374000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.15 |\n", + "| explained_variance | 0.998 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74799 |\n", + "| policy_loss | -0.158 |\n", + "| std | 0.623 |\n", + "| value_loss | 0.000636 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.64 |\n", + "| ep_rew_mean | -0.526 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 74900 |\n", + "| time_elapsed | 1281 |\n", + "| total_timesteps | 374500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.13 |\n", + "| explained_variance | 0.663 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74899 |\n", + "| policy_loss | -0.532 |\n", + "| std | 0.623 |\n", + "| value_loss | 0.0172 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.93 |\n", + "| ep_rew_mean | -0.474 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75000 |\n", + "| time_elapsed | 1283 |\n", + "| total_timesteps | 375000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.11 |\n", + "| explained_variance | 0.965 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74999 |\n", + "| policy_loss | 0.147 |\n", + "| std | 0.621 |\n", + "| value_loss | 0.00133 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.65 |\n", + "| ep_rew_mean | -0.419 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75100 |\n", + "| time_elapsed | 1285 |\n", + "| total_timesteps | 375500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.11 |\n", + "| explained_variance | 0.884 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75099 |\n", + "| policy_loss | -0.948 |\n", + "| std | 0.62 |\n", + "| value_loss | 0.0348 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.54 |\n", + "| ep_rew_mean | -0.379 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75200 |\n", + "| time_elapsed | 1286 |\n", + "| total_timesteps | 376000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.1 |\n", + "| explained_variance | 0.903 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75199 |\n", + "| policy_loss | 0.287 |\n", + "| std | 0.619 |\n", + "| value_loss | 0.00231 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.89 |\n", + "| ep_rew_mean | -0.432 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75300 |\n", + "| time_elapsed | 1288 |\n", + "| total_timesteps | 376500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.13 |\n", + "| explained_variance | 0.876 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75299 |\n", + "| policy_loss | 0.399 |\n", + "| std | 0.622 |\n", + "| value_loss | 0.00878 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.21 |\n", + "| ep_rew_mean | -0.472 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75400 |\n", + "| time_elapsed | 1290 |\n", + "| total_timesteps | 377000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.1 |\n", + "| explained_variance | 0.782 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75399 |\n", + "| policy_loss | -0.228 |\n", + "| std | 0.62 |\n", + "| value_loss | 0.00453 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.31 |\n", + "| ep_rew_mean | -0.469 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75500 |\n", + "| time_elapsed | 1291 |\n", + "| total_timesteps | 377500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.05 |\n", + "| explained_variance | 0.252 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75499 |\n", + "| policy_loss | 0.32 |\n", + "| std | 0.616 |\n", + "| value_loss | 0.00817 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.77 |\n", + "| ep_rew_mean | -0.415 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75600 |\n", + "| time_elapsed | 1293 |\n", + "| total_timesteps | 378000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.01 |\n", + "| explained_variance | 0.869 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75599 |\n", + "| policy_loss | -0.165 |\n", + "| std | 0.612 |\n", + "| value_loss | 0.00607 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.6 |\n", + "| ep_rew_mean | -0.38 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75700 |\n", + "| time_elapsed | 1295 |\n", + "| total_timesteps | 378500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.02 |\n", + "| explained_variance | 0.934 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75699 |\n", + "| policy_loss | 0.284 |\n", + "| std | 0.613 |\n", + "| value_loss | 0.00298 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.71 |\n", + "| ep_rew_mean | -0.414 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75800 |\n", + "| time_elapsed | 1297 |\n", + "| total_timesteps | 379000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.01 |\n", + "| explained_variance | -0.123 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75799 |\n", + "| policy_loss | -1.11 |\n", + "| std | 0.613 |\n", + "| value_loss | 0.0601 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.46 |\n", + "| ep_rew_mean | -0.384 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 75900 |\n", + "| time_elapsed | 1299 |\n", + "| total_timesteps | 379500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.98 |\n", + "| explained_variance | 0.948 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75899 |\n", + "| policy_loss | -0.0621 |\n", + "| std | 0.609 |\n", + "| value_loss | 0.00387 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.48 |\n", + "| ep_rew_mean | -0.371 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76000 |\n", + "| time_elapsed | 1300 |\n", + "| total_timesteps | 380000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.97 |\n", + "| explained_variance | -0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75999 |\n", + "| policy_loss | -0.626 |\n", + "| std | 0.608 |\n", + "| value_loss | 0.0292 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.311 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76100 |\n", + "| time_elapsed | 1302 |\n", + "| total_timesteps | 380500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.91 |\n", + "| explained_variance | 0.688 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76099 |\n", + "| policy_loss | -0.221 |\n", + "| std | 0.602 |\n", + "| value_loss | 0.00369 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.52 |\n", + "| ep_rew_mean | -0.375 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76200 |\n", + "| time_elapsed | 1304 |\n", + "| total_timesteps | 381000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.91 |\n", + "| explained_variance | 0.31 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76199 |\n", + "| policy_loss | 0.367 |\n", + "| std | 0.602 |\n", + "| value_loss | 0.0075 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.74 |\n", + "| ep_rew_mean | -0.407 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76300 |\n", + "| time_elapsed | 1305 |\n", + "| total_timesteps | 381500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.89 |\n", + "| explained_variance | 0.957 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76299 |\n", + "| policy_loss | -0.287 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.00325 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.7 |\n", + "| ep_rew_mean | -0.394 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76400 |\n", + "| time_elapsed | 1307 |\n", + "| total_timesteps | 382000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.87 |\n", + "| explained_variance | 0.847 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76399 |\n", + "| policy_loss | 0.207 |\n", + "| std | 0.598 |\n", + "| value_loss | 0.00257 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.32 |\n", + "| ep_rew_mean | -0.366 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76500 |\n", + "| time_elapsed | 1309 |\n", + "| total_timesteps | 382500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.88 |\n", + "| explained_variance | 0.883 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76499 |\n", + "| policy_loss | 0.161 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.00138 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.97 |\n", + "| ep_rew_mean | -0.312 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76600 |\n", + "| time_elapsed | 1311 |\n", + "| total_timesteps | 383000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.86 |\n", + "| explained_variance | 0.96 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76599 |\n", + "| policy_loss | 0.289 |\n", + "| std | 0.598 |\n", + "| value_loss | 0.00437 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.6 |\n", + "| ep_rew_mean | -0.399 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76700 |\n", + "| time_elapsed | 1313 |\n", + "| total_timesteps | 383500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.84 |\n", + "| explained_variance | 0.995 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76699 |\n", + "| policy_loss | 0.187 |\n", + "| std | 0.597 |\n", + "| value_loss | 0.00169 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.363 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76800 |\n", + "| time_elapsed | 1314 |\n", + "| total_timesteps | 384000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.83 |\n", + "| explained_variance | 0.858 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76799 |\n", + "| policy_loss | 0.153 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.00414 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.5 |\n", + "| ep_rew_mean | -0.379 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 76900 |\n", + "| time_elapsed | 1316 |\n", + "| total_timesteps | 384500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.83 |\n", + "| explained_variance | 0.979 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76899 |\n", + "| policy_loss | 0.231 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.00242 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.01 |\n", + "| ep_rew_mean | -0.326 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77000 |\n", + "| time_elapsed | 1317 |\n", + "| total_timesteps | 385000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.82 |\n", + "| explained_variance | 0.421 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76999 |\n", + "| policy_loss | -0.388 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.0049 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.41 |\n", + "| ep_rew_mean | -0.355 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77100 |\n", + "| time_elapsed | 1319 |\n", + "| total_timesteps | 385500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.79 |\n", + "| explained_variance | 0.863 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77099 |\n", + "| policy_loss | 0.458 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.0127 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.5 |\n", + "| ep_rew_mean | -0.365 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77200 |\n", + "| time_elapsed | 1321 |\n", + "| total_timesteps | 386000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.79 |\n", + "| explained_variance | 0.89 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77199 |\n", + "| policy_loss | 0.224 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00354 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.68 |\n", + "| ep_rew_mean | -0.396 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 77300 |\n", + "| time_elapsed | 1323 |\n", + "| total_timesteps | 386500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.83 |\n", + "| explained_variance | 0.78 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77299 |\n", + "| policy_loss | -0.63 |\n", + "| std | 0.597 |\n", + "| value_loss | 0.0126 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.64 |\n", + "| ep_rew_mean | -0.378 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77400 |\n", + "| time_elapsed | 1325 |\n", + "| total_timesteps | 387000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.88 |\n", + "| explained_variance | 0.348 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77399 |\n", + "| policy_loss | 0.876 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.0334 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4 |\n", + "| ep_rew_mean | -0.334 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77500 |\n", + "| time_elapsed | 1326 |\n", + "| total_timesteps | 387500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.89 |\n", + "| explained_variance | 0.989 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77499 |\n", + "| policy_loss | 0.306 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.00283 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.33 |\n", + "| ep_rew_mean | -0.353 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77600 |\n", + "| time_elapsed | 1328 |\n", + "| total_timesteps | 388000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.88 |\n", + "| explained_variance | 0.48 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77599 |\n", + "| policy_loss | -0.698 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.0283 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77700 |\n", + "| time_elapsed | 1329 |\n", + "| total_timesteps | 388500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.88 |\n", + "| explained_variance | 0.799 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77699 |\n", + "| policy_loss | 0.242 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.00354 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.33 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77800 |\n", + "| time_elapsed | 1331 |\n", + "| total_timesteps | 389000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.85 |\n", + "| explained_variance | 0.834 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77799 |\n", + "| policy_loss | 0.241 |\n", + "| std | 0.597 |\n", + "| value_loss | 0.00375 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.31 |\n", + "| ep_rew_mean | -0.361 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 77900 |\n", + "| time_elapsed | 1333 |\n", + "| total_timesteps | 389500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.83 |\n", + "| explained_variance | 0.917 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77899 |\n", + "| policy_loss | -0.0251 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.00161 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.13 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78000 |\n", + "| time_elapsed | 1335 |\n", + "| total_timesteps | 390000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.79 |\n", + "| explained_variance | 0.82 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77999 |\n", + "| policy_loss | 0.254 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00322 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.64 |\n", + "| ep_rew_mean | -0.292 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78100 |\n", + "| time_elapsed | 1337 |\n", + "| total_timesteps | 390500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.79 |\n", + "| explained_variance | 0.993 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78099 |\n", + "| policy_loss | -0.183 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.00204 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.4 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78200 |\n", + "| time_elapsed | 1338 |\n", + "| total_timesteps | 391000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.8 |\n", + "| explained_variance | 0.928 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78199 |\n", + "| policy_loss | -0.351 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.00284 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.93 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78300 |\n", + "| time_elapsed | 1340 |\n", + "| total_timesteps | 391500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.77 |\n", + "| explained_variance | 0.174 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78299 |\n", + "| policy_loss | 0.151 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00289 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.7 |\n", + "| ep_rew_mean | -0.295 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78400 |\n", + "| time_elapsed | 1341 |\n", + "| total_timesteps | 392000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.938 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78399 |\n", + "| policy_loss | -0.0736 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.000859 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78500 |\n", + "| time_elapsed | 1343 |\n", + "| total_timesteps | 392500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.77 |\n", + "| explained_variance | 0.963 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78499 |\n", + "| policy_loss | -0.00544 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.000619 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.04 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78600 |\n", + "| time_elapsed | 1345 |\n", + "| total_timesteps | 393000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.95 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78599 |\n", + "| policy_loss | -0.152 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00185 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78700 |\n", + "| time_elapsed | 1346 |\n", + "| total_timesteps | 393500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78699 |\n", + "| policy_loss | 0.0537 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.000228 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.66 |\n", + "| ep_rew_mean | -0.299 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78800 |\n", + "| time_elapsed | 1348 |\n", + "| total_timesteps | 394000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.77 |\n", + "| explained_variance | 0.986 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78799 |\n", + "| policy_loss | 0.112 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.000503 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.54 |\n", + "| ep_rew_mean | -0.293 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 78900 |\n", + "| time_elapsed | 1350 |\n", + "| total_timesteps | 394500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.941 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78899 |\n", + "| policy_loss | 0.148 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00109 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.62 |\n", + "| ep_rew_mean | -0.29 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79000 |\n", + "| time_elapsed | 1352 |\n", + "| total_timesteps | 395000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.847 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78999 |\n", + "| policy_loss | -0.0882 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.000801 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.68 |\n", + "| ep_rew_mean | -0.29 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79100 |\n", + "| time_elapsed | 1353 |\n", + "| total_timesteps | 395500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.88 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79099 |\n", + "| policy_loss | 0.107 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00122 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.32 |\n", + "| ep_rew_mean | -0.358 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79200 |\n", + "| time_elapsed | 1355 |\n", + "| total_timesteps | 396000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.76 |\n", + "| explained_variance | 0.983 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79199 |\n", + "| policy_loss | -0.0174 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.000369 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.72 |\n", + "| ep_rew_mean | -0.305 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79300 |\n", + "| time_elapsed | 1356 |\n", + "| total_timesteps | 396500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.76 |\n", + "| explained_variance | 0.865 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79299 |\n", + "| policy_loss | -0.246 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00269 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.359 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79400 |\n", + "| time_elapsed | 1358 |\n", + "| total_timesteps | 397000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.77 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79399 |\n", + "| policy_loss | 0.00824 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.000109 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.83 |\n", + "| ep_rew_mean | -0.311 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79500 |\n", + "| time_elapsed | 1360 |\n", + "| total_timesteps | 397500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.998 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79499 |\n", + "| policy_loss | -0.00668 |\n", + "| std | 0.593 |\n", + "| value_loss | 6.53e-05 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.93 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79600 |\n", + "| time_elapsed | 1362 |\n", + "| total_timesteps | 398000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.846 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79599 |\n", + "| policy_loss | -0.339 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.00599 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.95 |\n", + "| ep_rew_mean | -0.326 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79700 |\n", + "| time_elapsed | 1364 |\n", + "| total_timesteps | 398500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.848 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79699 |\n", + "| policy_loss | 0.321 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00438 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.354 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79800 |\n", + "| time_elapsed | 1365 |\n", + "| total_timesteps | 399000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.865 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79799 |\n", + "| policy_loss | 0.114 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.000994 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.64 |\n", + "| ep_rew_mean | -0.288 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 79900 |\n", + "| time_elapsed | 1367 |\n", + "| total_timesteps | 399500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.997 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79899 |\n", + "| policy_loss | -0.0071 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00012 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.303 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80000 |\n", + "| time_elapsed | 1369 |\n", + "| total_timesteps | 400000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.974 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79999 |\n", + "| policy_loss | -0.4 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00803 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.5 |\n", + "| ep_rew_mean | -0.357 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80100 |\n", + "| time_elapsed | 1370 |\n", + "| total_timesteps | 400500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.995 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80099 |\n", + "| policy_loss | -0.175 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00169 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.17 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80200 |\n", + "| time_elapsed | 1372 |\n", + "| total_timesteps | 401000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.98 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80199 |\n", + "| policy_loss | 0.0791 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00129 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.21 |\n", + "| ep_rew_mean | -0.437 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80300 |\n", + "| time_elapsed | 1374 |\n", + "| total_timesteps | 401500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.98 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80299 |\n", + "| policy_loss | -0.611 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.0116 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.11 |\n", + "| ep_rew_mean | -0.335 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80400 |\n", + "| time_elapsed | 1376 |\n", + "| total_timesteps | 402000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.972 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80399 |\n", + "| policy_loss | 0.102 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.000776 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.17 |\n", + "| ep_rew_mean | -0.335 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80500 |\n", + "| time_elapsed | 1377 |\n", + "| total_timesteps | 402500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80499 |\n", + "| policy_loss | 0.249 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00297 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.29 |\n", + "| ep_rew_mean | -0.355 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80600 |\n", + "| time_elapsed | 1379 |\n", + "| total_timesteps | 403000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.69 |\n", + "| explained_variance | 0.897 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80599 |\n", + "| policy_loss | -0.571 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.0145 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.49 |\n", + "| ep_rew_mean | -0.376 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80700 |\n", + "| time_elapsed | 1381 |\n", + "| total_timesteps | 403500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.802 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80699 |\n", + "| policy_loss | 0.05 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.00319 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.22 |\n", + "| ep_rew_mean | -0.343 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80800 |\n", + "| time_elapsed | 1382 |\n", + "| total_timesteps | 404000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.759 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80799 |\n", + "| policy_loss | 0.0934 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.000847 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.33 |\n", + "| ep_rew_mean | -0.35 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 80900 |\n", + "| time_elapsed | 1384 |\n", + "| total_timesteps | 404500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.965 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80899 |\n", + "| policy_loss | 0.128 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.00167 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.6 |\n", + "| ep_rew_mean | -0.373 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81000 |\n", + "| time_elapsed | 1385 |\n", + "| total_timesteps | 405000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.982 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80999 |\n", + "| policy_loss | 0.286 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.0024 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.313 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81100 |\n", + "| time_elapsed | 1387 |\n", + "| total_timesteps | 405500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.941 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81099 |\n", + "| policy_loss | 0.141 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00134 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.65 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81200 |\n", + "| time_elapsed | 1390 |\n", + "| total_timesteps | 406000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.774 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81199 |\n", + "| policy_loss | 0.00445 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.000498 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.8 |\n", + "| ep_rew_mean | -0.309 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81300 |\n", + "| time_elapsed | 1391 |\n", + "| total_timesteps | 406500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.834 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81299 |\n", + "| policy_loss | 0.0849 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00068 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.79 |\n", + "| ep_rew_mean | -0.31 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81400 |\n", + "| time_elapsed | 1393 |\n", + "| total_timesteps | 407000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.85 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81399 |\n", + "| policy_loss | -0.114 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00289 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.95 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81500 |\n", + "| time_elapsed | 1394 |\n", + "| total_timesteps | 407500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.717 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81499 |\n", + "| policy_loss | 0.383 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.00841 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81600 |\n", + "| time_elapsed | 1396 |\n", + "| total_timesteps | 408000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.68 |\n", + "| explained_variance | 0.958 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81599 |\n", + "| policy_loss | 0.0597 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.0012 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.04 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81700 |\n", + "| time_elapsed | 1397 |\n", + "| total_timesteps | 408500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.986 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81699 |\n", + "| policy_loss | -0.000123 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.00023 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.47 |\n", + "| ep_rew_mean | -0.367 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81800 |\n", + "| time_elapsed | 1399 |\n", + "| total_timesteps | 409000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.928 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81799 |\n", + "| policy_loss | 0.321 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00345 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.65 |\n", + "| ep_rew_mean | -0.391 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 81900 |\n", + "| time_elapsed | 1401 |\n", + "| total_timesteps | 409500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | -0.235 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81899 |\n", + "| policy_loss | -0.0123 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.00437 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.51 |\n", + "| ep_rew_mean | -0.373 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82000 |\n", + "| time_elapsed | 1403 |\n", + "| total_timesteps | 410000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.727 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81999 |\n", + "| policy_loss | -0.604 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.0221 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.49 |\n", + "| ep_rew_mean | -0.538 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82100 |\n", + "| time_elapsed | 1405 |\n", + "| total_timesteps | 410500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | -0.231 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82099 |\n", + "| policy_loss | 1.1 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.138 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.05 |\n", + "| ep_rew_mean | -0.397 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82200 |\n", + "| time_elapsed | 1407 |\n", + "| total_timesteps | 411000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.969 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82199 |\n", + "| policy_loss | 0.634 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00847 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.67 |\n", + "| ep_rew_mean | -0.299 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82300 |\n", + "| time_elapsed | 1408 |\n", + "| total_timesteps | 411500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.949 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82299 |\n", + "| policy_loss | 0.327 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00555 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.305 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82400 |\n", + "| time_elapsed | 1410 |\n", + "| total_timesteps | 412000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82399 |\n", + "| policy_loss | 0.147 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00121 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.5 |\n", + "| ep_rew_mean | -0.282 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82500 |\n", + "| time_elapsed | 1411 |\n", + "| total_timesteps | 412500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.829 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82499 |\n", + "| policy_loss | -0.338 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00608 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.85 |\n", + "| ep_rew_mean | -0.316 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82600 |\n", + "| time_elapsed | 1413 |\n", + "| total_timesteps | 413000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | -0.164 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82599 |\n", + "| policy_loss | 0.247 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00841 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.34 |\n", + "| ep_rew_mean | -0.273 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82700 |\n", + "| time_elapsed | 1416 |\n", + "| total_timesteps | 413500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.68 |\n", + "| explained_variance | 0.965 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82699 |\n", + "| policy_loss | 0.32 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.00267 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82800 |\n", + "| time_elapsed | 1417 |\n", + "| total_timesteps | 414000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.69 |\n", + "| explained_variance | 0.977 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82799 |\n", + "| policy_loss | -0.101 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.000629 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.21 |\n", + "| ep_rew_mean | -0.363 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 82900 |\n", + "| time_elapsed | 1419 |\n", + "| total_timesteps | 414500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.975 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82899 |\n", + "| policy_loss | 0.0751 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.000352 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.88 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83000 |\n", + "| time_elapsed | 1420 |\n", + "| total_timesteps | 415000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.69 |\n", + "| explained_variance | 0.841 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82999 |\n", + "| policy_loss | 0.213 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.00137 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.67 |\n", + "| ep_rew_mean | -0.295 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83100 |\n", + "| time_elapsed | 1422 |\n", + "| total_timesteps | 415500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.68 |\n", + "| explained_variance | 0.966 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83099 |\n", + "| policy_loss | -0.237 |\n", + "| std | 0.589 |\n", + "| value_loss | 0.00289 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.52 |\n", + "| ep_rew_mean | -0.274 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83200 |\n", + "| time_elapsed | 1424 |\n", + "| total_timesteps | 416000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.67 |\n", + "| explained_variance | -2.42 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83199 |\n", + "| policy_loss | 0.458 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0118 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.36 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83300 |\n", + "| time_elapsed | 1425 |\n", + "| total_timesteps | 416500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.68 |\n", + "| explained_variance | 0.987 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83299 |\n", + "| policy_loss | 0.107 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.0012 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.64 |\n", + "| ep_rew_mean | -0.299 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83400 |\n", + "| time_elapsed | 1428 |\n", + "| total_timesteps | 417000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.67 |\n", + "| explained_variance | 0.936 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83399 |\n", + "| policy_loss | 0.136 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.00604 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 83500 |\n", + "| time_elapsed | 1429 |\n", + "| total_timesteps | 417500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83499 |\n", + "| policy_loss | -0.298 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.00216 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.73 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 83600 |\n", + "| time_elapsed | 1431 |\n", + "| total_timesteps | 418000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.774 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83599 |\n", + "| policy_loss | 0.0687 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.000266 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.45 |\n", + "| ep_rew_mean | -0.28 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83700 |\n", + "| time_elapsed | 1433 |\n", + "| total_timesteps | 418500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83699 |\n", + "| policy_loss | -0.146 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.000823 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83800 |\n", + "| time_elapsed | 1434 |\n", + "| total_timesteps | 419000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.914 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83799 |\n", + "| policy_loss | 0.443 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00823 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.87 |\n", + "| ep_rew_mean | -0.3 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 83900 |\n", + "| time_elapsed | 1436 |\n", + "| total_timesteps | 419500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.614 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83899 |\n", + "| policy_loss | -0.0973 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00337 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.02 |\n", + "| ep_rew_mean | -0.332 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 84000 |\n", + "| time_elapsed | 1437 |\n", + "| total_timesteps | 420000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.973 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83999 |\n", + "| policy_loss | 0.0913 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00151 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.17 |\n", + "| ep_rew_mean | -0.348 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 84100 |\n", + "| time_elapsed | 1439 |\n", + "| total_timesteps | 420500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.67 |\n", + "| explained_variance | 0.974 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84099 |\n", + "| policy_loss | 0.041 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.000968 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.85 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 84200 |\n", + "| time_elapsed | 1442 |\n", + "| total_timesteps | 421000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.67 |\n", + "| explained_variance | 0.924 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84199 |\n", + "| policy_loss | -0.255 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.0018 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.92 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 84300 |\n", + "| time_elapsed | 1443 |\n", + "| total_timesteps | 421500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.69 |\n", + "| explained_variance | 0.942 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84299 |\n", + "| policy_loss | -0.291 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00266 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.4 |\n", + "| ep_rew_mean | -0.343 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 84400 |\n", + "| time_elapsed | 1445 |\n", + "| total_timesteps | 422000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.64 |\n", + "| explained_variance | 0.878 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84399 |\n", + "| policy_loss | 0.33 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.00649 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.72 |\n", + "| ep_rew_mean | -0.295 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 84500 |\n", + "| time_elapsed | 1446 |\n", + "| total_timesteps | 422500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.61 |\n", + "| explained_variance | 0.991 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84499 |\n", + "| policy_loss | -0.0852 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.000318 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.77 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 84600 |\n", + "| time_elapsed | 1448 |\n", + "| total_timesteps | 423000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.59 |\n", + "| explained_variance | 0.819 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84599 |\n", + "| policy_loss | -0.25 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.00259 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.67 |\n", + "| ep_rew_mean | -0.292 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 84700 |\n", + "| time_elapsed | 1449 |\n", + "| total_timesteps | 423500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.6 |\n", + "| explained_variance | -0.439 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84699 |\n", + "| policy_loss | -1.65 |\n", + "| std | 0.584 |\n", + "| value_loss | 0.0698 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.72 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 84800 |\n", + "| time_elapsed | 1451 |\n", + "| total_timesteps | 424000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.57 |\n", + "| explained_variance | 0.354 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84799 |\n", + "| policy_loss | 0.125 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.00793 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.79 |\n", + "| ep_rew_mean | -0.311 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 84900 |\n", + "| time_elapsed | 1453 |\n", + "| total_timesteps | 424500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.55 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84899 |\n", + "| policy_loss | -0.29 |\n", + "| std | 0.579 |\n", + "| value_loss | 0.00248 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.26 |\n", + "| ep_rew_mean | -0.341 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 85000 |\n", + "| time_elapsed | 1455 |\n", + "| total_timesteps | 425000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.55 |\n", + "| explained_variance | 0.909 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84999 |\n", + "| policy_loss | -0.351 |\n", + "| std | 0.579 |\n", + "| value_loss | 0.00391 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.34 |\n", + "| ep_rew_mean | -0.464 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85100 |\n", + "| time_elapsed | 1457 |\n", + "| total_timesteps | 425500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.58 |\n", + "| explained_variance | -0.283 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85099 |\n", + "| policy_loss | 1.26 |\n", + "| std | 0.582 |\n", + "| value_loss | 0.184 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.8 |\n", + "| ep_rew_mean | -0.477 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85200 |\n", + "| time_elapsed | 1458 |\n", + "| total_timesteps | 426000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.55 |\n", + "| explained_variance | -0.125 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85199 |\n", + "| policy_loss | 0.00426 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.00751 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.77 |\n", + "| ep_rew_mean | -0.38 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85300 |\n", + "| time_elapsed | 1460 |\n", + "| total_timesteps | 426500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.57 |\n", + "| explained_variance | 0.705 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85299 |\n", + "| policy_loss | 0.509 |\n", + "| std | 0.582 |\n", + "| value_loss | 0.0211 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.2 |\n", + "| ep_rew_mean | -0.331 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85400 |\n", + "| time_elapsed | 1461 |\n", + "| total_timesteps | 427000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.53 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85399 |\n", + "| policy_loss | 0.0383 |\n", + "| std | 0.579 |\n", + "| value_loss | 0.000396 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.04 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85500 |\n", + "| time_elapsed | 1463 |\n", + "| total_timesteps | 427500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.52 |\n", + "| explained_variance | 0.167 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85499 |\n", + "| policy_loss | -1.1 |\n", + "| std | 0.578 |\n", + "| value_loss | 0.0346 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.79 |\n", + "| ep_rew_mean | -0.451 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85600 |\n", + "| time_elapsed | 1464 |\n", + "| total_timesteps | 428000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.51 |\n", + "| explained_variance | 0.79 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85599 |\n", + "| policy_loss | 0.477 |\n", + "| std | 0.577 |\n", + "| value_loss | 0.0124 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.72 |\n", + "| ep_rew_mean | -0.536 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85700 |\n", + "| time_elapsed | 1466 |\n", + "| total_timesteps | 428500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.5 |\n", + "| explained_variance | 0.936 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85699 |\n", + "| policy_loss | -0.829 |\n", + "| std | 0.577 |\n", + "| value_loss | 0.0212 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.57 |\n", + "| ep_rew_mean | -0.283 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85800 |\n", + "| time_elapsed | 1469 |\n", + "| total_timesteps | 429000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.48 |\n", + "| explained_variance | 0.886 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85799 |\n", + "| policy_loss | -0.126 |\n", + "| std | 0.576 |\n", + "| value_loss | 0.0011 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.07 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 85900 |\n", + "| time_elapsed | 1470 |\n", + "| total_timesteps | 429500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.46 |\n", + "| explained_variance | 0.842 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85899 |\n", + "| policy_loss | -0.48 |\n", + "| std | 0.575 |\n", + "| value_loss | 0.00765 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.6 |\n", + "| ep_rew_mean | -0.374 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86000 |\n", + "| time_elapsed | 1472 |\n", + "| total_timesteps | 430000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.48 |\n", + "| explained_variance | 0.544 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85999 |\n", + "| policy_loss | 0.364 |\n", + "| std | 0.576 |\n", + "| value_loss | 0.00622 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.87 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86100 |\n", + "| time_elapsed | 1473 |\n", + "| total_timesteps | 430500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.46 |\n", + "| explained_variance | 0.959 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86099 |\n", + "| policy_loss | 0.149 |\n", + "| std | 0.574 |\n", + "| value_loss | 0.00123 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.19 |\n", + "| ep_rew_mean | -0.426 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86200 |\n", + "| time_elapsed | 1475 |\n", + "| total_timesteps | 431000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.46 |\n", + "| explained_variance | 0.908 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86199 |\n", + "| policy_loss | -0.419 |\n", + "| std | 0.574 |\n", + "| value_loss | 0.00646 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.05 |\n", + "| ep_rew_mean | -0.587 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86300 |\n", + "| time_elapsed | 1476 |\n", + "| total_timesteps | 431500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.47 |\n", + "| explained_variance | -2.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86299 |\n", + "| policy_loss | -2.05 |\n", + "| std | 0.574 |\n", + "| value_loss | 0.226 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.41 |\n", + "| ep_rew_mean | -0.784 |\n", + "| success_rate | 0.94 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86400 |\n", + "| time_elapsed | 1478 |\n", + "| total_timesteps | 432000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.49 |\n", + "| explained_variance | 0.0145 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86399 |\n", + "| policy_loss | -1.71 |\n", + "| std | 0.575 |\n", + "| value_loss | 0.13 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.48 |\n", + "| ep_rew_mean | -0.769 |\n", + "| success_rate | 0.96 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86500 |\n", + "| time_elapsed | 1480 |\n", + "| total_timesteps | 432500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.49 |\n", + "| explained_variance | -4.19 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86499 |\n", + "| policy_loss | 0.346 |\n", + "| std | 0.575 |\n", + "| value_loss | 0.0309 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.6 |\n", + "| ep_rew_mean | -0.696 |\n", + "| success_rate | 0.96 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86600 |\n", + "| time_elapsed | 1482 |\n", + "| total_timesteps | 433000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.5 |\n", + "| explained_variance | -46.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86599 |\n", + "| policy_loss | 8.6 |\n", + "| std | 0.576 |\n", + "| value_loss | 1.75 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.05 |\n", + "| ep_rew_mean | -0.394 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86700 |\n", + "| time_elapsed | 1484 |\n", + "| total_timesteps | 433500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.49 |\n", + "| explained_variance | 0.913 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86699 |\n", + "| policy_loss | -1.02 |\n", + "| std | 0.575 |\n", + "| value_loss | 0.0308 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.43 |\n", + "| ep_rew_mean | -0.349 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86800 |\n", + "| time_elapsed | 1485 |\n", + "| total_timesteps | 434000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.47 |\n", + "| explained_variance | 0.896 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86799 |\n", + "| policy_loss | -0.2 |\n", + "| std | 0.574 |\n", + "| value_loss | 0.00323 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.9 |\n", + "| ep_rew_mean | -0.379 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 86900 |\n", + "| time_elapsed | 1487 |\n", + "| total_timesteps | 434500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.45 |\n", + "| explained_variance | 0.824 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86899 |\n", + "| policy_loss | -0.86 |\n", + "| std | 0.573 |\n", + "| value_loss | 0.0525 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.01 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87000 |\n", + "| time_elapsed | 1489 |\n", + "| total_timesteps | 435000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.42 |\n", + "| explained_variance | 0.942 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86999 |\n", + "| policy_loss | 0.0193 |\n", + "| std | 0.571 |\n", + "| value_loss | 0.000476 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.8 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87100 |\n", + "| time_elapsed | 1490 |\n", + "| total_timesteps | 435500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.39 |\n", + "| explained_variance | 0.897 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87099 |\n", + "| policy_loss | 0.146 |\n", + "| std | 0.569 |\n", + "| value_loss | 0.00423 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.93 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87200 |\n", + "| time_elapsed | 1492 |\n", + "| total_timesteps | 436000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.37 |\n", + "| explained_variance | 0.868 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87199 |\n", + "| policy_loss | 0.147 |\n", + "| std | 0.568 |\n", + "| value_loss | 0.004 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87300 |\n", + "| time_elapsed | 1494 |\n", + "| total_timesteps | 436500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.37 |\n", + "| explained_variance | 0.494 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87299 |\n", + "| policy_loss | 0.165 |\n", + "| std | 0.567 |\n", + "| value_loss | 0.004 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37 |\n", + "| ep_rew_mean | -0.369 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87400 |\n", + "| time_elapsed | 1496 |\n", + "| total_timesteps | 437000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.34 |\n", + "| explained_variance | 0.976 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87399 |\n", + "| policy_loss | 0.155 |\n", + "| std | 0.566 |\n", + "| value_loss | 0.00141 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.8 |\n", + "| ep_rew_mean | -0.306 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87500 |\n", + "| time_elapsed | 1498 |\n", + "| total_timesteps | 437500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.35 |\n", + "| explained_variance | 0.924 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87499 |\n", + "| policy_loss | 0.0842 |\n", + "| std | 0.566 |\n", + "| value_loss | 0.00362 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.52 |\n", + "| ep_rew_mean | -0.365 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87600 |\n", + "| time_elapsed | 1499 |\n", + "| total_timesteps | 438000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.35 |\n", + "| explained_variance | 0.57 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87599 |\n", + "| policy_loss | 0.215 |\n", + "| std | 0.566 |\n", + "| value_loss | 0.00175 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.03 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87700 |\n", + "| time_elapsed | 1501 |\n", + "| total_timesteps | 438500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.35 |\n", + "| explained_variance | 0.946 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87699 |\n", + "| policy_loss | 0.0643 |\n", + "| std | 0.567 |\n", + "| value_loss | 0.00121 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.85 |\n", + "| ep_rew_mean | -0.303 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87800 |\n", + "| time_elapsed | 1502 |\n", + "| total_timesteps | 439000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.33 |\n", + "| explained_variance | 0.981 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87799 |\n", + "| policy_loss | 0.233 |\n", + "| std | 0.564 |\n", + "| value_loss | 0.0028 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.354 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 87900 |\n", + "| time_elapsed | 1504 |\n", + "| total_timesteps | 439500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.33 |\n", + "| explained_variance | 0.726 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87899 |\n", + "| policy_loss | -0.0954 |\n", + "| std | 0.565 |\n", + "| value_loss | 0.00221 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.306 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 88000 |\n", + "| time_elapsed | 1506 |\n", + "| total_timesteps | 440000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.35 |\n", + "| explained_variance | 0.856 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87999 |\n", + "| policy_loss | -0.739 |\n", + "| std | 0.565 |\n", + "| value_loss | 0.0165 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.63 |\n", + "| ep_rew_mean | -0.289 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 88100 |\n", + "| time_elapsed | 1508 |\n", + "| total_timesteps | 440500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.35 |\n", + "| explained_variance | 0.862 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88099 |\n", + "| policy_loss | 0.227 |\n", + "| std | 0.565 |\n", + "| value_loss | 0.00193 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.29 |\n", + "| ep_rew_mean | -0.348 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 88200 |\n", + "| time_elapsed | 1510 |\n", + "| total_timesteps | 441000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.35 |\n", + "| explained_variance | 0.919 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88199 |\n", + "| policy_loss | 0.11 |\n", + "| std | 0.565 |\n", + "| value_loss | 0.000803 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.31 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 88300 |\n", + "| time_elapsed | 1512 |\n", + "| total_timesteps | 441500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.36 |\n", + "| explained_variance | 0.856 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88299 |\n", + "| policy_loss | -0.059 |\n", + "| std | 0.565 |\n", + "| value_loss | 0.000924 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.335 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 88400 |\n", + "| time_elapsed | 1513 |\n", + "| total_timesteps | 442000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.36 |\n", + "| explained_variance | 0.984 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88399 |\n", + "| policy_loss | 0.0521 |\n", + "| std | 0.565 |\n", + "| value_loss | 0.000558 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.28 |\n", + "| ep_rew_mean | -0.355 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 88500 |\n", + "| time_elapsed | 1515 |\n", + "| total_timesteps | 442500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.37 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88499 |\n", + "| policy_loss | -0.156 |\n", + "| std | 0.566 |\n", + "| value_loss | 0.00142 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.51 |\n", + "| ep_rew_mean | -0.379 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 88600 |\n", + "| time_elapsed | 1516 |\n", + "| total_timesteps | 443000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.34 |\n", + "| explained_variance | 0.975 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88599 |\n", + "| policy_loss | 0.169 |\n", + "| std | 0.564 |\n", + "| value_loss | 0.0035 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.47 |\n", + "| ep_rew_mean | -0.357 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 88700 |\n", + "| time_elapsed | 1518 |\n", + "| total_timesteps | 443500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.31 |\n", + "| explained_variance | 0.984 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88699 |\n", + "| policy_loss | -0.341 |\n", + "| std | 0.562 |\n", + "| value_loss | 0.00445 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.358 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 88800 |\n", + "| time_elapsed | 1520 |\n", + "| total_timesteps | 444000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.3 |\n", + "| explained_variance | -0.359 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88799 |\n", + "| policy_loss | -0.222 |\n", + "| std | 0.562 |\n", + "| value_loss | 0.00447 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.38 |\n", + "| ep_rew_mean | -0.36 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 88900 |\n", + "| time_elapsed | 1522 |\n", + "| total_timesteps | 444500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.28 |\n", + "| explained_variance | 0.844 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88899 |\n", + "| policy_loss | -0.251 |\n", + "| std | 0.56 |\n", + "| value_loss | 0.00396 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.44 |\n", + "| ep_rew_mean | -0.367 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89000 |\n", + "| time_elapsed | 1524 |\n", + "| total_timesteps | 445000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.28 |\n", + "| explained_variance | 0.933 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88999 |\n", + "| policy_loss | 0.112 |\n", + "| std | 0.558 |\n", + "| value_loss | 0.00269 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.28 |\n", + "| ep_rew_mean | -0.449 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89100 |\n", + "| time_elapsed | 1525 |\n", + "| total_timesteps | 445500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.28 |\n", + "| explained_variance | 0.741 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89099 |\n", + "| policy_loss | -0.725 |\n", + "| std | 0.559 |\n", + "| value_loss | 0.0419 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.84 |\n", + "| ep_rew_mean | -0.411 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89200 |\n", + "| time_elapsed | 1527 |\n", + "| total_timesteps | 446000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.29 |\n", + "| explained_variance | 0.97 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89199 |\n", + "| policy_loss | -0.18 |\n", + "| std | 0.561 |\n", + "| value_loss | 0.00599 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.99 |\n", + "| ep_rew_mean | -0.414 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 89300 |\n", + "| time_elapsed | 1529 |\n", + "| total_timesteps | 446500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.32 |\n", + "| explained_variance | 0.89 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89299 |\n", + "| policy_loss | 0.141 |\n", + "| std | 0.563 |\n", + "| value_loss | 0.00322 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.34 |\n", + "| ep_rew_mean | -0.34 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 89400 |\n", + "| time_elapsed | 1530 |\n", + "| total_timesteps | 447000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.36 |\n", + "| explained_variance | 0.845 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89399 |\n", + "| policy_loss | -0.0196 |\n", + "| std | 0.566 |\n", + "| value_loss | 0.0014 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.15 |\n", + "| ep_rew_mean | -0.344 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89500 |\n", + "| time_elapsed | 1532 |\n", + "| total_timesteps | 447500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.34 |\n", + "| explained_variance | 0.378 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89499 |\n", + "| policy_loss | -0.317 |\n", + "| std | 0.565 |\n", + "| value_loss | 0.00366 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.18 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89600 |\n", + "| time_elapsed | 1534 |\n", + "| total_timesteps | 448000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.31 |\n", + "| explained_variance | 0.657 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89599 |\n", + "| policy_loss | 0.0751 |\n", + "| std | 0.561 |\n", + "| value_loss | 0.000955 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.44 |\n", + "| ep_rew_mean | -0.366 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89700 |\n", + "| time_elapsed | 1536 |\n", + "| total_timesteps | 448500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.27 |\n", + "| explained_variance | 0.95 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89699 |\n", + "| policy_loss | 0.0631 |\n", + "| std | 0.558 |\n", + "| value_loss | 0.000703 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.29 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89800 |\n", + "| time_elapsed | 1538 |\n", + "| total_timesteps | 449000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.26 |\n", + "| explained_variance | -0.707 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89799 |\n", + "| policy_loss | 0.94 |\n", + "| std | 0.557 |\n", + "| value_loss | 0.046 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.03 |\n", + "| ep_rew_mean | -0.333 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 89900 |\n", + "| time_elapsed | 1539 |\n", + "| total_timesteps | 449500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.26 |\n", + "| explained_variance | 0.993 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89899 |\n", + "| policy_loss | 0.22 |\n", + "| std | 0.557 |\n", + "| value_loss | 0.00141 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90000 |\n", + "| time_elapsed | 1541 |\n", + "| total_timesteps | 450000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.29 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89999 |\n", + "| policy_loss | 0.0163 |\n", + "| std | 0.558 |\n", + "| value_loss | 0.00295 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.2 |\n", + "| ep_rew_mean | -0.344 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90100 |\n", + "| time_elapsed | 1542 |\n", + "| total_timesteps | 450500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.29 |\n", + "| explained_variance | 0.982 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90099 |\n", + "| policy_loss | 0.118 |\n", + "| std | 0.558 |\n", + "| value_loss | 0.000764 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.49 |\n", + "| ep_rew_mean | -0.362 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 90200 |\n", + "| time_elapsed | 1544 |\n", + "| total_timesteps | 451000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.28 |\n", + "| explained_variance | 0.742 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90199 |\n", + "| policy_loss | 0.15 |\n", + "| std | 0.558 |\n", + "| value_loss | 0.00237 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.7 |\n", + "| ep_rew_mean | -0.291 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90300 |\n", + "| time_elapsed | 1546 |\n", + "| total_timesteps | 451500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.27 |\n", + "| explained_variance | 0.765 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90299 |\n", + "| policy_loss | 0.112 |\n", + "| std | 0.556 |\n", + "| value_loss | 0.0024 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.95 |\n", + "| ep_rew_mean | -0.317 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90400 |\n", + "| time_elapsed | 1548 |\n", + "| total_timesteps | 452000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.29 |\n", + "| explained_variance | 0.957 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90399 |\n", + "| policy_loss | 0.126 |\n", + "| std | 0.558 |\n", + "| value_loss | 0.000852 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.323 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90500 |\n", + "| time_elapsed | 1550 |\n", + "| total_timesteps | 452500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.28 |\n", + "| explained_variance | 0.796 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90499 |\n", + "| policy_loss | -0.371 |\n", + "| std | 0.559 |\n", + "| value_loss | 0.0071 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.59 |\n", + "| ep_rew_mean | -0.282 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90600 |\n", + "| time_elapsed | 1551 |\n", + "| total_timesteps | 453000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.26 |\n", + "| explained_variance | 0.925 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90599 |\n", + "| policy_loss | 0.165 |\n", + "| std | 0.556 |\n", + "| value_loss | 0.00161 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.95 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90700 |\n", + "| time_elapsed | 1553 |\n", + "| total_timesteps | 453500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.27 |\n", + "| explained_variance | 0.883 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90699 |\n", + "| policy_loss | 0.0258 |\n", + "| std | 0.558 |\n", + "| value_loss | 0.000999 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.317 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90800 |\n", + "| time_elapsed | 1555 |\n", + "| total_timesteps | 454000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.22 |\n", + "| explained_variance | 0.928 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90799 |\n", + "| policy_loss | -0.117 |\n", + "| std | 0.554 |\n", + "| value_loss | 0.00208 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.309 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 90900 |\n", + "| time_elapsed | 1556 |\n", + "| total_timesteps | 454500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.21 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90899 |\n", + "| policy_loss | -0.123 |\n", + "| std | 0.553 |\n", + "| value_loss | 0.000549 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.6 |\n", + "| ep_rew_mean | -0.287 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 91000 |\n", + "| time_elapsed | 1558 |\n", + "| total_timesteps | 455000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.19 |\n", + "| explained_variance | 0.989 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90999 |\n", + "| policy_loss | -0.136 |\n", + "| std | 0.551 |\n", + "| value_loss | 0.000819 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.26 |\n", + "| ep_rew_mean | -0.34 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91100 |\n", + "| time_elapsed | 1560 |\n", + "| total_timesteps | 455500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.18 |\n", + "| explained_variance | 0.901 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91099 |\n", + "| policy_loss | 0.234 |\n", + "| std | 0.551 |\n", + "| value_loss | 0.00392 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.4 |\n", + "| ep_rew_mean | -0.371 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91200 |\n", + "| time_elapsed | 1562 |\n", + "| total_timesteps | 456000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.16 |\n", + "| explained_variance | 0.927 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91199 |\n", + "| policy_loss | 0.0133 |\n", + "| std | 0.549 |\n", + "| value_loss | 0.000819 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91300 |\n", + "| time_elapsed | 1564 |\n", + "| total_timesteps | 456500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.15 |\n", + "| explained_variance | 0.968 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91299 |\n", + "| policy_loss | 0.262 |\n", + "| std | 0.547 |\n", + "| value_loss | 0.00341 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.63 |\n", + "| ep_rew_mean | -0.291 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91400 |\n", + "| time_elapsed | 1565 |\n", + "| total_timesteps | 457000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.13 |\n", + "| explained_variance | 0.97 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91399 |\n", + "| policy_loss | 0.158 |\n", + "| std | 0.546 |\n", + "| value_loss | 0.00105 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91500 |\n", + "| time_elapsed | 1567 |\n", + "| total_timesteps | 457500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.14 |\n", + "| explained_variance | 0.979 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91499 |\n", + "| policy_loss | -0.158 |\n", + "| std | 0.547 |\n", + "| value_loss | 0.00411 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.65 |\n", + "| ep_rew_mean | -0.288 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91600 |\n", + "| time_elapsed | 1568 |\n", + "| total_timesteps | 458000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.13 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91599 |\n", + "| policy_loss | -0.00905 |\n", + "| std | 0.545 |\n", + "| value_loss | 0.000151 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.21 |\n", + "| ep_rew_mean | -0.26 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91700 |\n", + "| time_elapsed | 1570 |\n", + "| total_timesteps | 458500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.1 |\n", + "| explained_variance | 0.964 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91699 |\n", + "| policy_loss | -0.0771 |\n", + "| std | 0.544 |\n", + "| value_loss | 0.000724 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.41 |\n", + "| ep_rew_mean | -0.279 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91800 |\n", + "| time_elapsed | 1572 |\n", + "| total_timesteps | 459000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.14 |\n", + "| explained_variance | 0.952 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91799 |\n", + "| policy_loss | 0.214 |\n", + "| std | 0.546 |\n", + "| value_loss | 0.00181 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.68 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 91900 |\n", + "| time_elapsed | 1574 |\n", + "| total_timesteps | 459500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.12 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91899 |\n", + "| policy_loss | -0.047 |\n", + "| std | 0.545 |\n", + "| value_loss | 0.000246 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.83 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92000 |\n", + "| time_elapsed | 1576 |\n", + "| total_timesteps | 460000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.13 |\n", + "| explained_variance | 0.97 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91999 |\n", + "| policy_loss | 0.0479 |\n", + "| std | 0.546 |\n", + "| value_loss | 0.000446 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.87 |\n", + "| ep_rew_mean | -0.308 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92100 |\n", + "| time_elapsed | 1577 |\n", + "| total_timesteps | 460500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.18 |\n", + "| explained_variance | 0.998 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92099 |\n", + "| policy_loss | 0.0181 |\n", + "| std | 0.549 |\n", + "| value_loss | 2.54e-05 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.34 |\n", + "| ep_rew_mean | -0.358 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92200 |\n", + "| time_elapsed | 1579 |\n", + "| total_timesteps | 461000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.16 |\n", + "| explained_variance | 0.675 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92199 |\n", + "| policy_loss | 0.102 |\n", + "| std | 0.548 |\n", + "| value_loss | 0.0023 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.46 |\n", + "| ep_rew_mean | -0.369 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92300 |\n", + "| time_elapsed | 1581 |\n", + "| total_timesteps | 461500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.15 |\n", + "| explained_variance | 0.359 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92299 |\n", + "| policy_loss | -1.15 |\n", + "| std | 0.547 |\n", + "| value_loss | 0.0583 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.53 |\n", + "| ep_rew_mean | -0.385 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92400 |\n", + "| time_elapsed | 1582 |\n", + "| total_timesteps | 462000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.16 |\n", + "| explained_variance | 0.953 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92399 |\n", + "| policy_loss | 0.187 |\n", + "| std | 0.548 |\n", + "| value_loss | 0.00202 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.12 |\n", + "| ep_rew_mean | -0.341 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92500 |\n", + "| time_elapsed | 1584 |\n", + "| total_timesteps | 462500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.16 |\n", + "| explained_variance | 0.323 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92499 |\n", + "| policy_loss | -0.142 |\n", + "| std | 0.548 |\n", + "| value_loss | 0.0019 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.66 |\n", + "| ep_rew_mean | -0.396 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92600 |\n", + "| time_elapsed | 1586 |\n", + "| total_timesteps | 463000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.17 |\n", + "| explained_variance | 0.633 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92599 |\n", + "| policy_loss | -0.19 |\n", + "| std | 0.549 |\n", + "| value_loss | 0.00488 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.83 |\n", + "| ep_rew_mean | -0.481 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92700 |\n", + "| time_elapsed | 1588 |\n", + "| total_timesteps | 463500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.17 |\n", + "| explained_variance | 0.173 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92699 |\n", + "| policy_loss | -1.09 |\n", + "| std | 0.549 |\n", + "| value_loss | 0.0262 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.82 |\n", + "| ep_rew_mean | -0.486 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92800 |\n", + "| time_elapsed | 1589 |\n", + "| total_timesteps | 464000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.19 |\n", + "| explained_variance | 0.193 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92799 |\n", + "| policy_loss | 0.189 |\n", + "| std | 0.551 |\n", + "| value_loss | 0.00871 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.93 |\n", + "| ep_rew_mean | -0.407 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 92900 |\n", + "| time_elapsed | 1591 |\n", + "| total_timesteps | 464500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.17 |\n", + "| explained_variance | 0.925 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92899 |\n", + "| policy_loss | 0.402 |\n", + "| std | 0.551 |\n", + "| value_loss | 0.00877 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.01 |\n", + "| ep_rew_mean | -0.413 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93000 |\n", + "| time_elapsed | 1593 |\n", + "| total_timesteps | 465000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.2 |\n", + "| explained_variance | 0.385 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92999 |\n", + "| policy_loss | -0.181 |\n", + "| std | 0.552 |\n", + "| value_loss | 0.0143 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.35 |\n", + "| ep_rew_mean | -0.42 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93100 |\n", + "| time_elapsed | 1594 |\n", + "| total_timesteps | 465500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.2 |\n", + "| explained_variance | 0.261 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93099 |\n", + "| policy_loss | 0.02 |\n", + "| std | 0.552 |\n", + "| value_loss | 0.0137 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.38 |\n", + "| ep_rew_mean | -0.426 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93200 |\n", + "| time_elapsed | 1596 |\n", + "| total_timesteps | 466000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.22 |\n", + "| explained_variance | 0.522 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93199 |\n", + "| policy_loss | 0.0501 |\n", + "| std | 0.555 |\n", + "| value_loss | 0.0048 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.74 |\n", + "| ep_rew_mean | -0.386 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93300 |\n", + "| time_elapsed | 1597 |\n", + "| total_timesteps | 466500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.23 |\n", + "| explained_variance | 0.929 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93299 |\n", + "| policy_loss | -0.566 |\n", + "| std | 0.555 |\n", + "| value_loss | 0.0126 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.41 |\n", + "| ep_rew_mean | -0.433 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93400 |\n", + "| time_elapsed | 1600 |\n", + "| total_timesteps | 467000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.23 |\n", + "| explained_variance | 0.741 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93399 |\n", + "| policy_loss | -0.0202 |\n", + "| std | 0.555 |\n", + "| value_loss | 0.0059 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.51 |\n", + "| ep_rew_mean | -0.524 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93500 |\n", + "| time_elapsed | 1601 |\n", + "| total_timesteps | 467500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.23 |\n", + "| explained_variance | -1.96 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93499 |\n", + "| policy_loss | -0.322 |\n", + "| std | 0.555 |\n", + "| value_loss | 0.0224 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.67 |\n", + "| ep_rew_mean | -0.468 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93600 |\n", + "| time_elapsed | 1603 |\n", + "| total_timesteps | 468000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.25 |\n", + "| explained_variance | -0.998 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93599 |\n", + "| policy_loss | 1.93 |\n", + "| std | 0.557 |\n", + "| value_loss | 0.152 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.81 |\n", + "| ep_rew_mean | -0.401 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93700 |\n", + "| time_elapsed | 1605 |\n", + "| total_timesteps | 468500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.19 |\n", + "| explained_variance | 0.846 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93699 |\n", + "| policy_loss | 0.731 |\n", + "| std | 0.552 |\n", + "| value_loss | 0.0279 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.44 |\n", + "| ep_rew_mean | -0.463 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93800 |\n", + "| time_elapsed | 1606 |\n", + "| total_timesteps | 469000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.21 |\n", + "| explained_variance | -5.17 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93799 |\n", + "| policy_loss | -1.75 |\n", + "| std | 0.553 |\n", + "| value_loss | 0.187 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.54 |\n", + "| ep_rew_mean | -0.549 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 93900 |\n", + "| time_elapsed | 1608 |\n", + "| total_timesteps | 469500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.22 |\n", + "| explained_variance | 0.768 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93899 |\n", + "| policy_loss | -3.04 |\n", + "| std | 0.554 |\n", + "| value_loss | 0.181 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.49 |\n", + "| ep_rew_mean | -0.665 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94000 |\n", + "| time_elapsed | 1609 |\n", + "| total_timesteps | 470000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.26 |\n", + "| explained_variance | 0.486 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93999 |\n", + "| policy_loss | -1.39 |\n", + "| std | 0.557 |\n", + "| value_loss | 0.0548 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.95 |\n", + "| ep_rew_mean | -0.731 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94100 |\n", + "| time_elapsed | 1611 |\n", + "| total_timesteps | 470500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.25 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94099 |\n", + "| policy_loss | -2.14 |\n", + "| std | 0.556 |\n", + "| value_loss | 0.218 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.86 |\n", + "| ep_rew_mean | -0.816 |\n", + "| success_rate | 0.96 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94200 |\n", + "| time_elapsed | 1613 |\n", + "| total_timesteps | 471000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.23 |\n", + "| explained_variance | -5.15 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94199 |\n", + "| policy_loss | -0.979 |\n", + "| std | 0.554 |\n", + "| value_loss | 0.0471 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.19 |\n", + "| ep_rew_mean | -0.626 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94300 |\n", + "| time_elapsed | 1615 |\n", + "| total_timesteps | 471500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.22 |\n", + "| explained_variance | -6.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94299 |\n", + "| policy_loss | 2.25 |\n", + "| std | 0.554 |\n", + "| value_loss | 0.721 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.89 |\n", + "| ep_rew_mean | -0.404 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94400 |\n", + "| time_elapsed | 1617 |\n", + "| total_timesteps | 472000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.22 |\n", + "| explained_variance | -28.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94399 |\n", + "| policy_loss | 0.0877 |\n", + "| std | 0.552 |\n", + "| value_loss | 0.0975 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.46 |\n", + "| ep_rew_mean | -0.453 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94500 |\n", + "| time_elapsed | 1618 |\n", + "| total_timesteps | 472500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.21 |\n", + "| explained_variance | 0.714 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94499 |\n", + "| policy_loss | 0.184 |\n", + "| std | 0.551 |\n", + "| value_loss | 0.0135 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.65 |\n", + "| ep_rew_mean | -0.47 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94600 |\n", + "| time_elapsed | 1620 |\n", + "| total_timesteps | 473000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.2 |\n", + "| explained_variance | 0.927 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94599 |\n", + "| policy_loss | -1.22 |\n", + "| std | 0.551 |\n", + "| value_loss | 0.0714 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.53 |\n", + "| ep_rew_mean | -0.368 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94700 |\n", + "| time_elapsed | 1621 |\n", + "| total_timesteps | 473500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.22 |\n", + "| explained_variance | 0.758 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94699 |\n", + "| policy_loss | -1.27 |\n", + "| std | 0.551 |\n", + "| value_loss | 0.0714 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.87 |\n", + "| ep_rew_mean | -0.406 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94800 |\n", + "| time_elapsed | 1623 |\n", + "| total_timesteps | 474000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.2 |\n", + "| explained_variance | 0.604 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94799 |\n", + "| policy_loss | -1.39 |\n", + "| std | 0.549 |\n", + "| value_loss | 0.107 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.27 |\n", + "| ep_rew_mean | -0.442 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 94900 |\n", + "| time_elapsed | 1625 |\n", + "| total_timesteps | 474500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.19 |\n", + "| explained_variance | 0.875 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94899 |\n", + "| policy_loss | 0.334 |\n", + "| std | 0.549 |\n", + "| value_loss | 0.0181 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.5 |\n", + "| ep_rew_mean | -0.352 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95000 |\n", + "| time_elapsed | 1627 |\n", + "| total_timesteps | 475000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.19 |\n", + "| explained_variance | -16.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94999 |\n", + "| policy_loss | -2.42 |\n", + "| std | 0.55 |\n", + "| value_loss | 0.328 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.71 |\n", + "| ep_rew_mean | -0.38 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95100 |\n", + "| time_elapsed | 1629 |\n", + "| total_timesteps | 475500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.18 |\n", + "| explained_variance | -4.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95099 |\n", + "| policy_loss | 0.787 |\n", + "| std | 0.55 |\n", + "| value_loss | 0.0919 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.88 |\n", + "| ep_rew_mean | -0.472 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95200 |\n", + "| time_elapsed | 1630 |\n", + "| total_timesteps | 476000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.17 |\n", + "| explained_variance | 0.917 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95199 |\n", + "| policy_loss | 0.108 |\n", + "| std | 0.548 |\n", + "| value_loss | 0.00221 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.49 |\n", + "| ep_rew_mean | -0.366 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95300 |\n", + "| time_elapsed | 1632 |\n", + "| total_timesteps | 476500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.14 |\n", + "| explained_variance | 0.693 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95299 |\n", + "| policy_loss | 0.0213 |\n", + "| std | 0.547 |\n", + "| value_loss | 0.00328 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.18 |\n", + "| ep_rew_mean | -0.331 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95400 |\n", + "| time_elapsed | 1633 |\n", + "| total_timesteps | 477000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.12 |\n", + "| explained_variance | -0.111 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95399 |\n", + "| policy_loss | 0.127 |\n", + "| std | 0.546 |\n", + "| value_loss | 0.016 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.19 |\n", + "| ep_rew_mean | -0.326 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95500 |\n", + "| time_elapsed | 1635 |\n", + "| total_timesteps | 477500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.1 |\n", + "| explained_variance | 0.591 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95499 |\n", + "| policy_loss | 0.0447 |\n", + "| std | 0.546 |\n", + "| value_loss | 0.0102 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.42 |\n", + "| ep_rew_mean | -0.356 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 95600 |\n", + "| time_elapsed | 1636 |\n", + "| total_timesteps | 478000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.08 |\n", + "| explained_variance | 0.922 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95599 |\n", + "| policy_loss | -0.0391 |\n", + "| std | 0.545 |\n", + "| value_loss | 0.00233 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.4 |\n", + "| ep_rew_mean | -0.354 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95700 |\n", + "| time_elapsed | 1638 |\n", + "| total_timesteps | 478500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.11 |\n", + "| explained_variance | 0.736 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95699 |\n", + "| policy_loss | 0.19 |\n", + "| std | 0.547 |\n", + "| value_loss | 0.00221 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.18 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95800 |\n", + "| time_elapsed | 1641 |\n", + "| total_timesteps | 479000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.11 |\n", + "| explained_variance | 0.903 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95799 |\n", + "| policy_loss | -0.319 |\n", + "| std | 0.547 |\n", + "| value_loss | 0.00559 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.28 |\n", + "| ep_rew_mean | -0.346 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 95900 |\n", + "| time_elapsed | 1642 |\n", + "| total_timesteps | 479500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.07 |\n", + "| explained_variance | 0.175 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95899 |\n", + "| policy_loss | 0.138 |\n", + "| std | 0.546 |\n", + "| value_loss | 0.00413 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.67 |\n", + "| ep_rew_mean | -0.377 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96000 |\n", + "| time_elapsed | 1644 |\n", + "| total_timesteps | 480000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.09 |\n", + "| explained_variance | 0.888 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95999 |\n", + "| policy_loss | -0.0767 |\n", + "| std | 0.549 |\n", + "| value_loss | 0.00145 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.88 |\n", + "| ep_rew_mean | -0.472 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96100 |\n", + "| time_elapsed | 1645 |\n", + "| total_timesteps | 480500 |\n", + "| train/ | |\n", + "| entropy_loss | -5.05 |\n", + "| explained_variance | 0.952 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96099 |\n", + "| policy_loss | -0.475 |\n", + "| std | 0.546 |\n", + "| value_loss | 0.016 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.97 |\n", + "| ep_rew_mean | -0.602 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96200 |\n", + "| time_elapsed | 1647 |\n", + "| total_timesteps | 481000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.02 |\n", + "| explained_variance | -1.26 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96199 |\n", + "| policy_loss | -2.03 |\n", + "| std | 0.543 |\n", + "| value_loss | 0.243 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.52 |\n", + "| ep_rew_mean | -0.482 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96300 |\n", + "| time_elapsed | 1649 |\n", + "| total_timesteps | 481500 |\n", + "| train/ | |\n", + "| entropy_loss | -5 |\n", + "| explained_variance | 0.558 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96299 |\n", + "| policy_loss | -1.29 |\n", + "| std | 0.542 |\n", + "| value_loss | 0.0935 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.48 |\n", + "| ep_rew_mean | -0.64 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 292 |\n", + "| iterations | 96400 |\n", + "| time_elapsed | 1650 |\n", + "| total_timesteps | 482000 |\n", + "| train/ | |\n", + "| entropy_loss | -5.01 |\n", + "| explained_variance | -1.73 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96399 |\n", + "| policy_loss | 0.695 |\n", + "| std | 0.542 |\n", + "| value_loss | 0.0338 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.99 |\n", + "| ep_rew_mean | -0.408 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96500 |\n", + "| time_elapsed | 1652 |\n", + "| total_timesteps | 482500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.98 |\n", + "| explained_variance | 0.653 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96499 |\n", + "| policy_loss | 0.972 |\n", + "| std | 0.54 |\n", + "| value_loss | 0.0399 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.32 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96600 |\n", + "| time_elapsed | 1654 |\n", + "| total_timesteps | 483000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.96 |\n", + "| explained_variance | -1.03 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96599 |\n", + "| policy_loss | 0.438 |\n", + "| std | 0.54 |\n", + "| value_loss | 0.196 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.63 |\n", + "| ep_rew_mean | -0.443 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96700 |\n", + "| time_elapsed | 1656 |\n", + "| total_timesteps | 483500 |\n", + "| train/ | |\n", + "| entropy_loss | -5 |\n", + "| explained_variance | 0.886 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96699 |\n", + "| policy_loss | -0.0131 |\n", + "| std | 0.543 |\n", + "| value_loss | 0.00365 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.53 |\n", + "| ep_rew_mean | -0.641 |\n", + "| success_rate | 0.96 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96800 |\n", + "| time_elapsed | 1658 |\n", + "| total_timesteps | 484000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.98 |\n", + "| explained_variance | -0.246 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96799 |\n", + "| policy_loss | -1.48 |\n", + "| std | 0.541 |\n", + "| value_loss | 0.0927 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.1 |\n", + "| ep_rew_mean | -0.936 |\n", + "| success_rate | 0.91 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 96900 |\n", + "| time_elapsed | 1659 |\n", + "| total_timesteps | 484500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.97 |\n", + "| explained_variance | -0.406 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96899 |\n", + "| policy_loss | -0.988 |\n", + "| std | 0.541 |\n", + "| value_loss | 0.05 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.2 |\n", + "| ep_rew_mean | -1.15 |\n", + "| success_rate | 0.86 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97000 |\n", + "| time_elapsed | 1661 |\n", + "| total_timesteps | 485000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.98 |\n", + "| explained_variance | -707 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96999 |\n", + "| policy_loss | -6.14 |\n", + "| std | 0.542 |\n", + "| value_loss | 1.24 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 13 |\n", + "| ep_rew_mean | -1.23 |\n", + "| success_rate | 0.81 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97100 |\n", + "| time_elapsed | 1662 |\n", + "| total_timesteps | 485500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.94 |\n", + "| explained_variance | -13.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97099 |\n", + "| policy_loss | 0.328 |\n", + "| std | 0.54 |\n", + "| value_loss | 0.039 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12 |\n", + "| ep_rew_mean | -1.08 |\n", + "| success_rate | 0.85 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97200 |\n", + "| time_elapsed | 1664 |\n", + "| total_timesteps | 486000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.92 |\n", + "| explained_variance | -3.71 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97199 |\n", + "| policy_loss | 2.07 |\n", + "| std | 0.539 |\n", + "| value_loss | 0.19 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.6 |\n", + "| ep_rew_mean | -1.04 |\n", + "| success_rate | 0.88 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97300 |\n", + "| time_elapsed | 1667 |\n", + "| total_timesteps | 486500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.9 |\n", + "| explained_variance | -3.21 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97299 |\n", + "| policy_loss | -11.8 |\n", + "| std | 0.537 |\n", + "| value_loss | 4.18 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.9 |\n", + "| ep_rew_mean | -1.17 |\n", + "| success_rate | 0.88 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97400 |\n", + "| time_elapsed | 1668 |\n", + "| total_timesteps | 487000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.91 |\n", + "| explained_variance | -43.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97399 |\n", + "| policy_loss | -0.773 |\n", + "| std | 0.538 |\n", + "| value_loss | 0.036 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.6 |\n", + "| ep_rew_mean | -1.01 |\n", + "| success_rate | 0.94 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97500 |\n", + "| time_elapsed | 1670 |\n", + "| total_timesteps | 487500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.92 |\n", + "| explained_variance | 0.662 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97499 |\n", + "| policy_loss | 3.37 |\n", + "| std | 0.54 |\n", + "| value_loss | 0.473 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.95 |\n", + "| ep_rew_mean | -0.846 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97600 |\n", + "| time_elapsed | 1671 |\n", + "| total_timesteps | 488000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.89 |\n", + "| explained_variance | -4.26 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97599 |\n", + "| policy_loss | 15 |\n", + "| std | 0.537 |\n", + "| value_loss | 26 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.4 |\n", + "| ep_rew_mean | -0.882 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97700 |\n", + "| time_elapsed | 1673 |\n", + "| total_timesteps | 488500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.85 |\n", + "| explained_variance | -4.05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97699 |\n", + "| policy_loss | -0.48 |\n", + "| std | 0.535 |\n", + "| value_loss | 0.0188 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.2 |\n", + "| ep_rew_mean | -0.86 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97800 |\n", + "| time_elapsed | 1675 |\n", + "| total_timesteps | 489000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.87 |\n", + "| explained_variance | -7.07 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97799 |\n", + "| policy_loss | 4.96 |\n", + "| std | 0.536 |\n", + "| value_loss | 1.74 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.68 |\n", + "| ep_rew_mean | -0.463 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 97900 |\n", + "| time_elapsed | 1676 |\n", + "| total_timesteps | 489500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.88 |\n", + "| explained_variance | 0.305 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97899 |\n", + "| policy_loss | -6.96 |\n", + "| std | 0.538 |\n", + "| value_loss | 2.72 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.17 |\n", + "| ep_rew_mean | -0.434 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98000 |\n", + "| time_elapsed | 1678 |\n", + "| total_timesteps | 490000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.87 |\n", + "| explained_variance | 0.393 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97999 |\n", + "| policy_loss | 2.03 |\n", + "| std | 0.536 |\n", + "| value_loss | 0.483 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.46 |\n", + "| ep_rew_mean | -0.432 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98100 |\n", + "| time_elapsed | 1680 |\n", + "| total_timesteps | 490500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.85 |\n", + "| explained_variance | 0.0917 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98099 |\n", + "| policy_loss | -2.34 |\n", + "| std | 0.535 |\n", + "| value_loss | 0.522 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.6 |\n", + "| ep_rew_mean | -0.463 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98200 |\n", + "| time_elapsed | 1682 |\n", + "| total_timesteps | 491000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.84 |\n", + "| explained_variance | 0.72 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98199 |\n", + "| policy_loss | 2.01 |\n", + "| std | 0.535 |\n", + "| value_loss | 0.0987 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.47 |\n", + "| ep_rew_mean | -0.547 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98300 |\n", + "| time_elapsed | 1684 |\n", + "| total_timesteps | 491500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.84 |\n", + "| explained_variance | -4.35 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98299 |\n", + "| policy_loss | -1.36 |\n", + "| std | 0.534 |\n", + "| value_loss | 0.0759 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.57 |\n", + "| ep_rew_mean | -0.733 |\n", + "| success_rate | 0.92 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98400 |\n", + "| time_elapsed | 1685 |\n", + "| total_timesteps | 492000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.85 |\n", + "| explained_variance | -11.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98399 |\n", + "| policy_loss | -0.732 |\n", + "| std | 0.535 |\n", + "| value_loss | 0.106 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.05 |\n", + "| ep_rew_mean | -0.768 |\n", + "| success_rate | 0.92 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98500 |\n", + "| time_elapsed | 1687 |\n", + "| total_timesteps | 492500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.83 |\n", + "| explained_variance | -103 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98499 |\n", + "| policy_loss | -1.24 |\n", + "| std | 0.533 |\n", + "| value_loss | 0.163 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.82 |\n", + "| ep_rew_mean | -0.833 |\n", + "| success_rate | 0.94 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98600 |\n", + "| time_elapsed | 1689 |\n", + "| total_timesteps | 493000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.87 |\n", + "| explained_variance | -2.64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98599 |\n", + "| policy_loss | -1.38 |\n", + "| std | 0.536 |\n", + "| value_loss | 0.109 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.16 |\n", + "| ep_rew_mean | -0.792 |\n", + "| success_rate | 0.96 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98700 |\n", + "| time_elapsed | 1690 |\n", + "| total_timesteps | 493500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.88 |\n", + "| explained_variance | -1.82 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98699 |\n", + "| policy_loss | 19.9 |\n", + "| std | 0.537 |\n", + "| value_loss | 21.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.74 |\n", + "| ep_rew_mean | -0.491 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98800 |\n", + "| time_elapsed | 1693 |\n", + "| total_timesteps | 494000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.86 |\n", + "| explained_variance | 0.539 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98799 |\n", + "| policy_loss | -0.219 |\n", + "| std | 0.535 |\n", + "| value_loss | 0.0812 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.8 |\n", + "| ep_rew_mean | -0.454 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 98900 |\n", + "| time_elapsed | 1694 |\n", + "| total_timesteps | 494500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.84 |\n", + "| explained_variance | -7.22 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98899 |\n", + "| policy_loss | 1.01 |\n", + "| std | 0.535 |\n", + "| value_loss | 0.123 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.79 |\n", + "| ep_rew_mean | -0.398 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99000 |\n", + "| time_elapsed | 1696 |\n", + "| total_timesteps | 495000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.8 |\n", + "| explained_variance | 0.704 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98999 |\n", + "| policy_loss | -0.619 |\n", + "| std | 0.532 |\n", + "| value_loss | 0.0342 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.67 |\n", + "| ep_rew_mean | -0.381 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99100 |\n", + "| time_elapsed | 1698 |\n", + "| total_timesteps | 495500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.77 |\n", + "| explained_variance | -6.73 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99099 |\n", + "| policy_loss | -0.454 |\n", + "| std | 0.529 |\n", + "| value_loss | 0.129 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.58 |\n", + "| ep_rew_mean | -0.381 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99200 |\n", + "| time_elapsed | 1699 |\n", + "| total_timesteps | 496000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.79 |\n", + "| explained_variance | -4.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99199 |\n", + "| policy_loss | 0.0709 |\n", + "| std | 0.531 |\n", + "| value_loss | 0.02 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.33 |\n", + "| ep_rew_mean | -0.352 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99300 |\n", + "| time_elapsed | 1701 |\n", + "| total_timesteps | 496500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.78 |\n", + "| explained_variance | -0.18 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99299 |\n", + "| policy_loss | 0.445 |\n", + "| std | 0.53 |\n", + "| value_loss | 0.0119 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.2 |\n", + "| ep_rew_mean | -0.421 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99400 |\n", + "| time_elapsed | 1702 |\n", + "| total_timesteps | 497000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.8 |\n", + "| explained_variance | -0.351 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99399 |\n", + "| policy_loss | 0.292 |\n", + "| std | 0.532 |\n", + "| value_loss | 0.0207 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.318 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99500 |\n", + "| time_elapsed | 1705 |\n", + "| total_timesteps | 497500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.78 |\n", + "| explained_variance | 0.481 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99499 |\n", + "| policy_loss | -0.367 |\n", + "| std | 0.529 |\n", + "| value_loss | 0.0192 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.64 |\n", + "| ep_rew_mean | -0.387 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99600 |\n", + "| time_elapsed | 1707 |\n", + "| total_timesteps | 498000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.76 |\n", + "| explained_variance | -0.395 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99599 |\n", + "| policy_loss | 0.939 |\n", + "| std | 0.527 |\n", + "| value_loss | 0.0312 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.05 |\n", + "| ep_rew_mean | -0.41 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99700 |\n", + "| time_elapsed | 1708 |\n", + "| total_timesteps | 498500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.74 |\n", + "| explained_variance | 0.941 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99699 |\n", + "| policy_loss | -0.00683 |\n", + "| std | 0.525 |\n", + "| value_loss | 0.0017 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.9 |\n", + "| ep_rew_mean | -0.406 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99800 |\n", + "| time_elapsed | 1710 |\n", + "| total_timesteps | 499000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.74 |\n", + "| explained_variance | 0.461 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99799 |\n", + "| policy_loss | 0.149 |\n", + "| std | 0.525 |\n", + "| value_loss | 0.00716 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.06 |\n", + "| ep_rew_mean | -0.414 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 99900 |\n", + "| time_elapsed | 1712 |\n", + "| total_timesteps | 499500 |\n", + "| train/ | |\n", + "| entropy_loss | -4.72 |\n", + "| explained_variance | 0.854 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99899 |\n", + "| policy_loss | -0.272 |\n", + "| std | 0.524 |\n", + "| value_loss | 0.00807 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.8 |\n", + "| ep_rew_mean | -0.397 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 291 |\n", + "| iterations | 100000 |\n", + "| time_elapsed | 1713 |\n", + "| total_timesteps | 500000 |\n", + "| train/ | |\n", + "| entropy_loss | -4.72 |\n", + "| explained_variance | 0.972 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99999 |\n", + "| policy_loss | 0.206 |\n", + "| std | 0.523 |\n", + "| value_loss | 0.00366 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "<br> <style><br> .wandb-row {<br> display: flex;<br> flex-direction: row;<br> flex-wrap: wrap;<br> justify-content: flex-start;<br> width: 100%;<br> }<br> .wandb-col {<br> display: flex;<br> flex-direction: column;<br> flex-basis: 100%;<br> flex: 1;<br> padding: 10px;<br> }<br> </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████</td></tr><tr><td>rollout/ep_len_mean</td><td>█▆▆▆▆▆▃▄▄▄▃▂▅▆▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂</td></tr><tr><td>rollout/ep_rew_mean</td><td>▁▄▄▅▆▆▇▆████████████████████████████████</td></tr><tr><td>rollout/success_rate</td><td>▁▄▃▄▄▅▆██▇████████████████████████████▇█</td></tr><tr><td>time/fps</td><td>▁▂▄▂▅▆▇███▇▇▇▇▇▇▇███▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇</td></tr><tr><td>train/entropy_loss</td><td>▁▁▁▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇█</td></tr><tr><td>train/explained_variance</td><td>▁▇▆▃▃▅▇▆▇█▇██▇████████████▇█████▄█████▆█</td></tr><tr><td>train/learning_rate</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train/policy_loss</td><td>▁▃▂▃▃▃▂▇█▂▁▃▃▃▃▁▂▂▂▂▂▃▂▃▃▂▃▂▂▂▃▂▃▃▃▂▃▃▃▃</td></tr><tr><td>train/std</td><td>█▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁</td></tr><tr><td>train/value_loss</td><td>▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>500000</td></tr><tr><td>rollout/ep_len_mean</td><td>4.8</td></tr><tr><td>rollout/ep_rew_mean</td><td>-0.39746</td></tr><tr><td>rollout/success_rate</td><td>1</td></tr><tr><td>time/fps</td><td>291</td></tr><tr><td>train/entropy_loss</td><td>-4.71591</td></tr><tr><td>train/explained_variance</td><td>0.97199</td></tr><tr><td>train/learning_rate</td><td>0.0007</td></tr><tr><td>train/policy_loss</td><td>0.206</td></tr><tr><td>train/std</td><td>0.52343</td></tr><tr><td>train/value_loss</td><td>0.00366</td></tr></table><br/></div></div>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run <strong style=\"color:#cdcd00\">brisk-fog-2</strong> at: <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach/runs/y39cy9ws</a><br> View project at: <a href='https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach' target=\"_blank\">https://wandb.ai/thomasdgr-ecole-centrale-de-lyon/pandareach</a><br>Synced 5 W&B file(s), 0 media file(s), 3 artifact file(s) and 1 other file(s)" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Find logs at: <code>./wandb/run-20250312_160957-y39cy9ws/logs</code>" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "model.save(\"a2c_pandareach\")\n", + "\n", + "login(token=\"xxx\")\n", + "push_to_hub(\n", + " repo_id=\"Thomstr/A2C_PandaReach\",\n", + " filename=\"a2c_pandareach.zip\",\n", + " commit_message=\"Added A2C model for PandaReach with Stable Baselines3\",\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 173, + "referenced_widgets": [ + "4b24dddcdfc64f9d943f67e25675a8d1", + "7f8f84aaf0f34b12950cc66d078b2812", + "440f7f8102854ac4a38f5c7929446fe6", + "f92b16113ecf4cbf999208426529ccad", + "04966e7c2fff44d08a5d5d2083c4c36f", + "71a187c360684dfa8165f0da5a6bd84b", + "48aea33a04ec425291c36aba5afce22e", + "33217ae5bf0a484fb3ae7bbf17cf0fbc", + "d69510a7c2894695a6ac9d8f03daf543", + "6e384db209c243c88dbc91571a4418f7", + "e038b80524d4425a86421308d60c7445" + ] + }, + "id": "mFGEVNsW-2fq", + "outputId": "a40f2762-15fe-4f2b-d03e-ffd598e2cb5b" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001B[38;5;4mℹ Pushing repo Thomstr/A2C_PandaReach to the Hugging Face Hub\u001B[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "a2c_pandareach.zip: 0%| | 0.00/114k [00:00<?, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "4b24dddcdfc64f9d943f67e25675a8d1" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001B[38;5;2m✔ Your model has been uploaded to the Hub, you can find it here:\n", + "https://huggingface.co/Thomstr/A2C_PandaReach/tree/main/\u001B[0m\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "CommitInfo(commit_url='https://huggingface.co/Thomstr/A2C_PandaReach/commit/62a9cd410bd1e266a040b6966191aa7deaf3eb62', commit_message='Added A2C model for PandaReach with Stable Baselines3', commit_description='', oid='62a9cd410bd1e266a040b6966191aa7deaf3eb62', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Thomstr/A2C_PandaReach', endpoint='https://huggingface.co', repo_type='model', repo_id='Thomstr/A2C_PandaReach'), pr_revision=None, pr_num=None)" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 4 + } + ] + } + ] +} diff --git a/a2c_sb3_panda_reach.py b/a2c_sb3_panda_reach.py index ec45523687c7c76469ed2df686b056272f80b405..cab34acd1ca2382a69975a98733a2ce0678a2edb 100644 --- a/a2c_sb3_panda_reach.py +++ b/a2c_sb3_panda_reach.py @@ -1,45 +1,46 @@ -import gym +import gymnasium as gym import panda_gym from stable_baselines3 import A2C from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.vec_env import DummyVecEnv import wandb from wandb.integration.sb3 import WandbCallback +from huggingface_hub import login +from huggingface_sb3 import push_to_hub -config = { - "policy_type": "MultiInputPolicy", - "total_timesteps": 500000, - "env_name": "PandaReachJointsDense-v3", -} +if __name__ == "__main__": + config = { + "policy_type": "MultiInputPolicy", + "total_timesteps": 500000, + "env_name": "PandaReachJointsDense-v3", + } -run = wandb.init( - project="pandareach", - config=config, - sync_tensorboard=True, - monitor_gym=True, - save_code=True, -) + wandb.login(key='xxx') + + run = wandb.init( + project="pandareach", + config=config, + sync_tensorboard=True, + monitor_gym=True, + save_code=True, + ) -def make_env(): env = gym.make(config["env_name"]) - env = Monitor(env) # record stats such as returns - return env - -env = DummyVecEnv([make_env]) -env = gym.make("PandaReachJointsDense-v3") -model = A2C(config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}") -model.learn( - total_timesteps=config["total_timesteps"], - callback=WandbCallback( + model = A2C(config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}") + model.learn( + total_timesteps=config["total_timesteps"], + callback=WandbCallback( + ) ) -) -run.finish() + run.finish() + + model.save("a2c_pandareach") -login(token="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") -push_to_hub( - repo_id="Thomstr/A2C_PandaReach", - filename="a2c_pandareach.zip", - commit_message="Added A2C model for PandaReach with Stable Baselines3", - ) \ No newline at end of file + login(token="xxx") + push_to_hub( + repo_id="Thomstr/A2C_PandaReach", + filename="a2c_pandareach.zip", + commit_message="Added A2C model for PandaReach with Stable Baselines3", + ) \ No newline at end of file