From 057fd9fe59ebd2cbc5fad59c0da6881f0c674a0c Mon Sep 17 00:00:00 2001 From: Benyahia Mohammed Oussama <mohammed.benyahia@etu.ec-lyon.fr> Date: Wed, 26 Feb 2025 15:26:15 +0000 Subject: [PATCH] Replace a2c_sb3_cartpole.ipynb --- a2c_sb3_cartpole.ipynb | 29108 +++++++++++++++++++++++++++++++-------- 1 file changed, 23053 insertions(+), 6055 deletions(-) diff --git a/a2c_sb3_cartpole.ipynb b/a2c_sb3_cartpole.ipynb index d86d8d9..c14c3fe 100644 --- a/a2c_sb3_cartpole.ipynb +++ b/a2c_sb3_cartpole.ipynb @@ -1,6456 +1,23454 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "a9046086ab104ad19085b9fe91797e36": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [], + "layout": "IPY_MODEL_ab0f8641ef2e47b6ada23a3197b29bdc" + } + }, + "700cf055d755454e8e3a514486c945e9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6047961d9cbf4e90a8b342dae97ccbb7", + "placeholder": "", + "style": "IPY_MODEL_ca1f49a4b5dd41399d3f1133c367eab1", + "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>" + } + }, + "298989225cd04e4c9273d3d57c3f8577": { + "model_module": "@jupyter-widgets/controls", + "model_name": "PasswordModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "PasswordModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "PasswordView", + "continuous_update": true, + "description": "Token:", + "description_tooltip": null, + "disabled": false, + "layout": "IPY_MODEL_0f3330b587364ddb8185244b3cc8e302", + "placeholder": "", + "style": "IPY_MODEL_e2a14cac37924445890727e1bb99f5da", + "value": "" + } + }, + "8ea627d85bcb4948bc27b5d83296f967": { + "model_module": "@jupyter-widgets/controls", + "model_name": "CheckboxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "CheckboxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "CheckboxView", + "description": "Add token as git credential?", + "description_tooltip": null, + "disabled": false, + "indent": true, + "layout": "IPY_MODEL_a880a16471de4055bf3fca0f0a927131", + "style": "IPY_MODEL_b5db27274d8c4f50bfcd343813e292e6", + "value": true + } + }, + "40e3f08692c34f9e9f6ff74e7d246d93": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ButtonView", + "button_style": "", + "description": "Login", + "disabled": false, + "icon": "", + "layout": "IPY_MODEL_52f7c233d0284066a29dd45ac2e9ca55", + "style": "IPY_MODEL_1dd9cf2b47d64b928b346406c83bea40", + "tooltip": "" + } + }, + "522db5f67c6040538f2ba2ac24ab1546": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_781969fcfc19418d9a3a8ba92c4b9f20", + "placeholder": "", + "style": "IPY_MODEL_207341f7cd094f5ab882a733ff055742", + "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>" + } + }, + "ab0f8641ef2e47b6ada23a3197b29bdc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": "center", + "align_self": null, + "border": null, + "bottom": null, + "display": "flex", + "flex": null, + "flex_flow": "column", + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": "50%" + } + }, + "6047961d9cbf4e90a8b342dae97ccbb7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ca1f49a4b5dd41399d3f1133c367eab1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0f3330b587364ddb8185244b3cc8e302": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e2a14cac37924445890727e1bb99f5da": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a880a16471de4055bf3fca0f0a927131": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b5db27274d8c4f50bfcd343813e292e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "52f7c233d0284066a29dd45ac2e9ca55": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1dd9cf2b47d64b928b346406c83bea40": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ButtonStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": null, + "font_weight": "" + } + }, + "781969fcfc19418d9a3a8ba92c4b9f20": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "207341f7cd094f5ab882a733ff055742": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "84265be849b24979bb5977dc4c89aad7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "LabelModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_40a53988f49247359d0a72f079f78da2", + "placeholder": "", + "style": "IPY_MODEL_13bf4b6de6ff4a78943d3b713a88461b", + "value": "Connecting..." + } + }, + "40a53988f49247359d0a72f079f78da2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "13bf4b6de6ff4a78943d3b713a88461b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c78c5ec0063b416289dce2fe89e9b0ad": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7fd552a5e11744049800633bae0113eb", + "IPY_MODEL_ee4ec4e591294599a359719edbd8e683", + "IPY_MODEL_619f5cf36b8045728c25441e5dbcd87e" + ], + "layout": "IPY_MODEL_39cd3e39b77544e784129cf10345b7d1" + } + }, + "7fd552a5e11744049800633bae0113eb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4c6da74c843748b9b72512619a2b8ff2", + "placeholder": "", + "style": "IPY_MODEL_c3560a282d7947b0a0fc8c8859be2b4c", + "value": "policy.optimizer.pth: 100%" + } + }, + "ee4ec4e591294599a359719edbd8e683": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_eef69609139c477f88dcf0c72f485493", + "max": 49224, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5b56561bb7e4406dbfc1765ebf073c8d", + "value": 49224 + } + }, + "619f5cf36b8045728c25441e5dbcd87e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_283a3a4a6ad34970b1f9354cfc061fe8", + "placeholder": "", + "style": "IPY_MODEL_418ee5dd7b8b4a5c80f723c26994020c", + "value": " 49.2k/49.2k [00:00<00:00, 77.3kB/s]" + } + }, + "39cd3e39b77544e784129cf10345b7d1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c6da74c843748b9b72512619a2b8ff2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c3560a282d7947b0a0fc8c8859be2b4c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eef69609139c477f88dcf0c72f485493": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5b56561bb7e4406dbfc1765ebf073c8d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "283a3a4a6ad34970b1f9354cfc061fe8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "418ee5dd7b8b4a5c80f723c26994020c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "adbdabe296864d4bb49cd7bf8a6eaa81": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_2ca21c3b56de4cdfbabf69e62f2c5b5d", + "IPY_MODEL_4122230c2ca94b98b392556b403ea576", + "IPY_MODEL_75aac76a1e3443d38b045f75e8229d04" + ], + "layout": "IPY_MODEL_45e2d80723ac44a88112c71c1b44d324" + } + }, + "2ca21c3b56de4cdfbabf69e62f2c5b5d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_10dd75ccb3ce49c7a9d430d427adebd8", + "placeholder": "", + "style": "IPY_MODEL_88092a58a99b430ca62a488e5f5378b5", + "value": "Upload 3 LFS files: 100%" + } + }, + "4122230c2ca94b98b392556b403ea576": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_38f181cc724a4771b977da55d73fad3a", + "max": 3, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c4e56a18ccdc42c8bd2f3837f69dec0c", + "value": 3 + } + }, + "75aac76a1e3443d38b045f75e8229d04": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0b740bf50fa643898ef910e59c3975cd", + "placeholder": "", + "style": "IPY_MODEL_543cae745d054c6ea92d37575a92b556", + "value": " 3/3 [00:00<00:00, 2.49it/s]" + } + }, + "45e2d80723ac44a88112c71c1b44d324": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "10dd75ccb3ce49c7a9d430d427adebd8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "88092a58a99b430ca62a488e5f5378b5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "38f181cc724a4771b977da55d73fad3a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c4e56a18ccdc42c8bd2f3837f69dec0c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0b740bf50fa643898ef910e59c3975cd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "543cae745d054c6ea92d37575a92b556": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "af001b75d30f48c7a5a02293afa17ba5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8a7ed8d444d14ff2bda905fba1e6f2c0", + "IPY_MODEL_b07139fd2f8b470a940d9f07afdaa0f9", + "IPY_MODEL_5c6278e79f274f2e96a1e2e323b61b17" + ], + "layout": "IPY_MODEL_3c75e898bfdb45329da491debd7804a6" + } + }, + "8a7ed8d444d14ff2bda905fba1e6f2c0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d36d8e3f912e46cbae23854960e6e43e", + "placeholder": "", + "style": "IPY_MODEL_71b3536b80c64ae59f9b32ad865d6e55", + "value": "policy.pth: 100%" + } + }, + "b07139fd2f8b470a940d9f07afdaa0f9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2be2284cce3b462192b40b5ce90b98a8", + "max": 47343, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6f5334bfb2404cb89c5ce0479592a454", + "value": 47343 + } + }, + "5c6278e79f274f2e96a1e2e323b61b17": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_12d43c87ba1d484d900c33a50c398391", + "placeholder": "", + "style": "IPY_MODEL_f6b77041e8054281acf81d3cd6809d84", + "value": " 47.3k/47.3k [00:00<00:00, 65.7kB/s]" + } + }, + "3c75e898bfdb45329da491debd7804a6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d36d8e3f912e46cbae23854960e6e43e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "71b3536b80c64ae59f9b32ad865d6e55": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2be2284cce3b462192b40b5ce90b98a8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6f5334bfb2404cb89c5ce0479592a454": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "12d43c87ba1d484d900c33a50c398391": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f6b77041e8054281acf81d3cd6809d84": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fc348c31afbf470a84acde08f7f4142c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4112157198ea406bb57cddbcef95e55d", + "IPY_MODEL_8296fa13df80418ab2d12b4856f564c8", + "IPY_MODEL_a8937b021ecb421986877e2d852516c0" + ], + "layout": "IPY_MODEL_08c8924685c648f9aa0b78b86e1af506" + } + }, + "4112157198ea406bb57cddbcef95e55d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_452d21dd58bd4b1396ec5f7e78ebed31", + "placeholder": "", + "style": "IPY_MODEL_2e7a1e4df0e243329d7abe148a54ad27", + "value": "a2c-panda-reach.zip: 100%" + } + }, + "8296fa13df80418ab2d12b4856f564c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9f7e7d0bb83c4ae08bb1dc907a6c66dd", + "max": 113710, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_30208c255d674ae3b908e538406f82ee", + "value": 113710 + } + }, + "a8937b021ecb421986877e2d852516c0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_04b3bb74aa08413cb5b4225a2f21bca7", + "placeholder": "", + "style": "IPY_MODEL_96818b1f5fd546868d830b0bc1610a9c", + "value": " 114k/114k [00:00<00:00, 89.4kB/s]" + } + }, + "08c8924685c648f9aa0b78b86e1af506": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "452d21dd58bd4b1396ec5f7e78ebed31": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2e7a1e4df0e243329d7abe148a54ad27": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9f7e7d0bb83c4ae08bb1dc907a6c66dd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "30208c255d674ae3b908e538406f82ee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "04b3bb74aa08413cb5b4225a2f21bca7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "96818b1f5fd546868d830b0bc1610a9c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2nCK-hBUz1ho", + "outputId": "b3226991-5b8b-49da-bc3b-14c29756d0d9" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting panda-gym==3.0.7\n", + " Downloading panda_gym-3.0.7-py3-none-any.whl.metadata (4.3 kB)\n", + "Collecting stable-baselines3\n", + " Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)\n", + "Requirement already satisfied: wandb in /usr/local/lib/python3.11/dist-packages (0.19.6)\n", + "Collecting huggingface_sb3\n", + " Downloading huggingface_sb3-3.0-py3-none-any.whl.metadata (6.3 kB)\n", + "Requirement already satisfied: gymnasium>=0.26 in /usr/local/lib/python3.11/dist-packages (from panda-gym==3.0.7) (1.0.0)\n", + "Collecting pybullet (from panda-gym==3.0.7)\n", + " Downloading pybullet-3.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from panda-gym==3.0.7) (1.26.4)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from panda-gym==3.0.7) (1.13.1)\n", + "Requirement already satisfied: torch<3.0,>=2.3 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (2.5.1+cu124)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (3.1.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (2.2.2)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (3.10.0)\n", + "Requirement already satisfied: click!=8.0.0,>=7.1 in /usr/local/lib/python3.11/dist-packages (from wandb) (8.1.8)\n", + "Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (0.4.0)\n", + "Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (3.1.44)\n", + "Requirement already satisfied: platformdirs in /usr/local/lib/python3.11/dist-packages (from wandb) (4.3.6)\n", + "Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (4.25.6)\n", + "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (5.9.5)\n", + "Requirement already satisfied: pydantic<3,>=2.6 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.10.6)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from wandb) (6.0.2)\n", + "Requirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.32.3)\n", + "Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.22.0)\n", + "Requirement already satisfied: setproctitle in /usr/local/lib/python3.11/dist-packages (from wandb) (1.3.4)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from wandb) (75.1.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.4 in /usr/local/lib/python3.11/dist-packages (from wandb) (4.12.2)\n", + "Requirement already satisfied: huggingface-hub~=0.8 in /usr/local/lib/python3.11/dist-packages (from huggingface_sb3) (0.28.1)\n", + "Requirement already satisfied: wasabi in /usr/local/lib/python3.11/dist-packages (from huggingface_sb3) (1.1.3)\n", + "Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from docker-pycreds>=0.4.0->wandb) (1.17.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym==3.0.7) (0.0.4)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (3.17.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (24.2)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (4.67.1)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=2.6->wandb) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=2.6->wandb) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2025.1.31)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.1.5)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.5.147 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.127)\n", + "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", + " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.1.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3.0,>=2.3->stable-baselines3) (1.3.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (1.3.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (4.56.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (1.4.8)\n", + "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (11.1.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (3.2.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3) (2025.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3) (2025.1)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch<3.0,>=2.3->stable-baselines3) (3.0.2)\n", + "Downloading panda_gym-3.0.7-py3-none-any.whl (23 kB)\n", + "Downloading stable_baselines3-2.5.0-py3-none-any.whl (183 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading huggingface_sb3-3.0-py3-none-any.whl (9.7 kB)\n", + "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m53.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m42.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m61.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pybullet-3.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (103.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.2/103.2 MB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: pybullet, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, panda-gym, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, huggingface_sb3, stable-baselines3\n", + " Attempting uninstall: nvidia-nvjitlink-cu12\n", + " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", + " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", + " Attempting uninstall: nvidia-curand-cu12\n", + " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", + " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", + " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", + " Attempting uninstall: nvidia-cufft-cu12\n", + " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", + " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", + " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", + " Attempting uninstall: nvidia-cuda-runtime-cu12\n", + " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", + " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-cupti-cu12\n", + " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cublas-cu12\n", + " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", + " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", + " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", + " Attempting uninstall: nvidia-cusparse-cu12\n", + " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", + " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", + " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", + " Attempting uninstall: nvidia-cudnn-cu12\n", + " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", + " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", + " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", + " Attempting uninstall: nvidia-cusolver-cu12\n", + " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", + " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", + " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", + "Successfully installed huggingface_sb3-3.0 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 panda-gym-3.0.7 pybullet-3.2.7 stable-baselines3-2.5.0\n", + "Requirement already satisfied: panda-gym==3.0.7 in /usr/local/lib/python3.11/dist-packages (3.0.7)\n", + "Requirement already satisfied: gymnasium>=0.26 in /usr/local/lib/python3.11/dist-packages (from panda-gym==3.0.7) (1.0.0)\n", + "Requirement already satisfied: pybullet in /usr/local/lib/python3.11/dist-packages (from panda-gym==3.0.7) (3.2.7)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from panda-gym==3.0.7) (1.26.4)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from panda-gym==3.0.7) (1.13.1)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym==3.0.7) (3.1.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym==3.0.7) (4.12.2)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium>=0.26->panda-gym==3.0.7) (0.0.4)\n" + ] + } + ], + "source": [ + "!pip install panda-gym==3.0.7 stable-baselines3 wandb huggingface_sb3\n", + "! pip install --upgrade panda-gym==3.0.7\n" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install wandb -qU\n", + "#0b197edd6d50d8cc0ed00564436ada87f46084fa\n", + "! wandb login --relogin" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NiIjvtLfasLj", + "outputId": "2313253d-a77f-4ac2-e314-e537d46725d3" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.8/20.8 MB\u001b[0m \u001b[31m44.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: No netrc file found, creating one.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import wandb\n", + "wandb.login()" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, - "id": "k-Am822Jb3rY", - "outputId": "27dae454-cdd8-4f9f-f8a2-619d2fe7400d" - }, - "outputs": [ + "id": "c2HWR0VVay1J", + "outputId": "84954610-515f-4d22-ddeb-1297a30dae77" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mbenyahiamohammedoussama\u001b[0m (\u001b[33mbenyahiamohammedoussama-ecole-central-lyon\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Initialize a new run\n", + "wandb.init(project=\"panda-gym\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 125 + }, + "id": "aYyAj3xmay39", + "outputId": "96c2f77a-68f0-4d88-ed06-1786bcd720d5" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Tracking run with wandb version 0.19.7" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Run data is saved locally in <code>/content/wandb/run-20250226_101509-h4i7sifx</code>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Syncing run <strong><a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/h4i7sifx' target=\"_blank\">light-snowflake-25</a></strong> to <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View project at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/h4i7sifx' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/h4i7sifx</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "data": { + "text/html": [ + "<button onClick=\"this.nextSibling.style.display='block';this.style.display='none';\">Display W&B run</button><iframe src='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/h4i7sifx?jupyter=true' style='border:none;width:100%;height:420px;display:none;'></iframe>" + ], + "text/plain": [ + "<wandb.sdk.wandb_run.Run at 0x7a55d7011e50>" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from huggingface_hub import notebook_login\n", + "\n", + "notebook_login()\n", + "#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" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17, + "referenced_widgets": [ + "a9046086ab104ad19085b9fe91797e36", + "700cf055d755454e8e3a514486c945e9", + "298989225cd04e4c9273d3d57c3f8577", + "8ea627d85bcb4948bc27b5d83296f967", + "40e3f08692c34f9e9f6ff74e7d246d93", + "522db5f67c6040538f2ba2ac24ab1546", + "ab0f8641ef2e47b6ada23a3197b29bdc", + "6047961d9cbf4e90a8b342dae97ccbb7", + "ca1f49a4b5dd41399d3f1133c367eab1", + "0f3330b587364ddb8185244b3cc8e302", + "e2a14cac37924445890727e1bb99f5da", + "a880a16471de4055bf3fca0f0a927131", + "b5db27274d8c4f50bfcd343813e292e6", + "52f7c233d0284066a29dd45ac2e9ca55", + "1dd9cf2b47d64b928b346406c83bea40", + "781969fcfc19418d9a3a8ba92c4b9f20", + "207341f7cd094f5ab882a733ff055742", + "84265be849b24979bb5977dc4c89aad7", + "40a53988f49247359d0a72f079f78da2", + "13bf4b6de6ff4a78943d3b713a88461b" + ] + }, + "id": "ja19EsqZaWF8", + "outputId": "a8efb7e5-4c75-4d0e-d52e-2a8a55be19a3" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "a9046086ab104ad19085b9fe91797e36" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "import gymnasium as gym\n", + "from stable_baselines3 import A2C\n", + "from stable_baselines3.common.monitor import Monitor\n", + "from stable_baselines3.common.vec_env import DummyVecEnv\n", + "from stable_baselines3.common.evaluation import evaluate_policy\n", + "import wandb\n", + "import panda_gym\n", + "from wandb.integration.sb3 import WandbCallback\n", + "from huggingface_hub import notebook_login\n", + "from huggingface_sb3 import package_to_hub\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Configuration dictionary for the RL agent\n", + "config = {\n", + " \"policy_type\": \"MultiInputPolicy\",\n", + " \"total_timesteps\": 500000,\n", + " \"env_name\": \"PandaReachJointsDense-v3\",\n", + " \"num_episodes\": 600,\n", + "}\n", + "\n", + "# Initialize a new wandb run\n", + "run = wandb.init(\n", + " project=\"panda-gym\",\n", + " config=config,\n", + " sync_tensorboard=True, # Auto-upload\n", + " monitor_gym=True, # Auto-upload the videos of agents playing the game\n", + " save_code=True, # Save the code (optional)\n", + ")\n", + "\n", + "def make_env():\n", + "\n", + " env = gym.make(config[\"env_name\"])\n", + " env = Monitor(env) # Record stats such as returns\n", + " return env\n", + "\n", + "# Create the environment and model\n", + "env = DummyVecEnv([make_env])\n", + "model = A2C(config[\"policy_type\"], env, verbose=1, tensorboard_log=f\"runs/{run.id}\")\n", + "\n", + "# Train the model for a fixed number of episodes\n", + "episode_rewards = []\n", + "timesteps_per_episode = config[\"total_timesteps\"] // config[\"num_episodes\"]\n", + "\n", + "for episode in range(config[\"num_episodes\"]):\n", + " obs = env.reset()\n", + " total_reward = 0\n", + " done = False\n", + " while not done:\n", + " action, _states = model.predict(obs, deterministic=True)\n", + " obs, reward, done, info = env.step(action)\n", + " total_reward += reward[0]\n", + "\n", + " episode_rewards.append(total_reward)\n", + " print(f\"Episode {episode+1}/{config['num_episodes']}: Total Reward = {total_reward:.2f}\")\n", + "\n", + " # Train the model incrementally\n", + " model.learn(total_timesteps=timesteps_per_episode, reset_num_timesteps=False)\n", + "\n", + "# Save the model\n", + "model.save(\"a2c_panda_reach\")\n", + "wandb.log({\"model_saved\": True})\n", + "\n", + "# Evaluate the model\n", + "eval_env = DummyVecEnv([lambda: gym.make(\"PandaReachJointsDense-v3\")])\n", + "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n", + "wandb.log({\"mean_reward\": mean_reward, \"std_reward\": std_reward})\n", + "print(f\"Evaluation: mean_reward={mean_reward:.2f} +/- {std_reward:.2f}\")\n", + "\n", + "# Plot Total Reward per Episode\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(episode_rewards, label=\"Episode Reward\")\n", + "plt.xlabel(\"Episode\")\n", + "plt.ylabel(\"Total Reward\")\n", + "plt.title(\"Total Reward per Episode\")\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.show()\n", + "\n", + "# Upload the model to the Hugging Face Hub\n", + "package_to_hub(\n", + " model=model,\n", + " model_name=\"a2c-panda-reach\",\n", + " model_architecture=\"A2C\",\n", + " env_id=\"PandaReachJointsDense-v3\",\n", + " eval_env=eval_env,\n", + " repo_id=\"oussamab2n/a2c-panda-reach\",\n", + " commit_message=\"Optimized model upload with evaluation\"\n", + ")\n", + "\n", + "# Finish the wandb run\n", + "wandb.finish()\n", + "\n", + "print(\"Modèle entraîné sur 500 épisodes, évalué, sauvegardé et visualisé avec succès !\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "c78c5ec0063b416289dce2fe89e9b0ad", + "7fd552a5e11744049800633bae0113eb", + "ee4ec4e591294599a359719edbd8e683", + "619f5cf36b8045728c25441e5dbcd87e", + "39cd3e39b77544e784129cf10345b7d1", + "4c6da74c843748b9b72512619a2b8ff2", + "c3560a282d7947b0a0fc8c8859be2b4c", + "eef69609139c477f88dcf0c72f485493", + "5b56561bb7e4406dbfc1765ebf073c8d", + "283a3a4a6ad34970b1f9354cfc061fe8", + "418ee5dd7b8b4a5c80f723c26994020c", + "adbdabe296864d4bb49cd7bf8a6eaa81", + "2ca21c3b56de4cdfbabf69e62f2c5b5d", + "4122230c2ca94b98b392556b403ea576", + "75aac76a1e3443d38b045f75e8229d04", + "45e2d80723ac44a88112c71c1b44d324", + "10dd75ccb3ce49c7a9d430d427adebd8", + "88092a58a99b430ca62a488e5f5378b5", + "38f181cc724a4771b977da55d73fad3a", + "c4e56a18ccdc42c8bd2f3837f69dec0c", + "0b740bf50fa643898ef910e59c3975cd", + "543cae745d054c6ea92d37575a92b556", + "af001b75d30f48c7a5a02293afa17ba5", + "8a7ed8d444d14ff2bda905fba1e6f2c0", + "b07139fd2f8b470a940d9f07afdaa0f9", + "5c6278e79f274f2e96a1e2e323b61b17", + "3c75e898bfdb45329da491debd7804a6", + "d36d8e3f912e46cbae23854960e6e43e", + "71b3536b80c64ae59f9b32ad865d6e55", + "2be2284cce3b462192b40b5ce90b98a8", + "6f5334bfb2404cb89c5ce0479592a454", + "12d43c87ba1d484d900c33a50c398391", + "f6b77041e8054281acf81d3cd6809d84", + "fc348c31afbf470a84acde08f7f4142c", + "4112157198ea406bb57cddbcef95e55d", + "8296fa13df80418ab2d12b4856f564c8", + "a8937b021ecb421986877e2d852516c0", + "08c8924685c648f9aa0b78b86e1af506", + "452d21dd58bd4b1396ec5f7e78ebed31", + "2e7a1e4df0e243329d7abe148a54ad27", + "9f7e7d0bb83c4ae08bb1dc907a6c66dd", + "30208c255d674ae3b908e538406f82ee", + "04b3bb74aa08413cb5b4225a2f21bca7", + "96818b1f5fd546868d830b0bc1610a9c" + ] + }, + "id": "CB_jCjFuLzKZ", + "outputId": "204eada6-0a9f-4ade-9b9a-2a99a412913e" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Tracking run with wandb version 0.19.7" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Run data is saved locally in <code>/content/wandb/run-20250226_140257-aqrdlwti</code>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Syncing run <strong><a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/aqrdlwti' target=\"_blank\">sweet-pyramid-32</a></strong> to <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View project at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/aqrdlwti' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/aqrdlwti</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Using cpu device\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 1/600: Total Reward = -8.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.8 |\n", + "| ep_rew_mean | -10.3 |\n", + "| success_rate | 0.25 |\n", + "| time/ | |\n", + "| fps | 223 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.9 |\n", + "| explained_variance | 0.473 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99 |\n", + "| policy_loss | -5.6 |\n", + "| std | 0.996 |\n", + "| value_loss | 0.388 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 2/600: Total Reward = -4.45\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 43.8 |\n", + "| ep_rew_mean | -11.3 |\n", + "| success_rate | 0.138 |\n", + "| time/ | |\n", + "| fps | 217 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 1335 |\n", + "| train/ | |\n", + "| entropy_loss | -9.93 |\n", + "| explained_variance | -0.162 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 266 |\n", + "| policy_loss | 2.27 |\n", + "| std | 1 |\n", + "| value_loss | 0.0702 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 3/600: Total Reward = -12.06\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 44.5 |\n", + "| ep_rew_mean | -10.5 |\n", + "| success_rate | 0.128 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 2170 |\n", + "| train/ | |\n", + "| entropy_loss | -9.96 |\n", + "| explained_variance | -3.14 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 433 |\n", + "| policy_loss | 3.12 |\n", + "| std | 1 |\n", + "| value_loss | 0.083 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 4/600: Total Reward = -9.18\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 44.4 |\n", + "| ep_rew_mean | -10.6 |\n", + "| success_rate | 0.136 |\n", + "| time/ | |\n", + "| fps | 181 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 3005 |\n", + "| train/ | |\n", + "| entropy_loss | -9.96 |\n", + "| explained_variance | -0.125 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 600 |\n", + "| policy_loss | 3.9 |\n", + "| std | 1 |\n", + "| value_loss | 0.177 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 5/600: Total Reward = -5.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 44.4 |\n", + "| ep_rew_mean | -10.4 |\n", + "| success_rate | 0.145 |\n", + "| time/ | |\n", + "| fps | 163 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 3840 |\n", + "| train/ | |\n", + "| entropy_loss | -9.96 |\n", + "| explained_variance | -0.149 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 767 |\n", + "| policy_loss | -3.72 |\n", + "| std | 1 |\n", + "| value_loss | 0.173 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 6/600: Total Reward = -9.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 45 |\n", + "| ep_rew_mean | -10.4 |\n", + "| success_rate | 0.13 |\n", + "| time/ | |\n", + "| fps | 197 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 4675 |\n", + "| train/ | |\n", + "| entropy_loss | -9.97 |\n", + "| explained_variance | -16.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 934 |\n", + "| policy_loss | 1.7 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0856 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 7/600: Total Reward = -6.01\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 46.2 |\n", + "| ep_rew_mean | -10.2 |\n", + "| success_rate | 0.11 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 5510 |\n", + "| train/ | |\n", + "| entropy_loss | -9.98 |\n", + "| explained_variance | -0.0124 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1101 |\n", + "| policy_loss | -2.58 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0868 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 8/600: Total Reward = -11.52\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 46.4 |\n", + "| ep_rew_mean | -9.76 |\n", + "| success_rate | 0.11 |\n", + "| time/ | |\n", + "| fps | 231 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 6345 |\n", + "| train/ | |\n", + "| entropy_loss | -10 |\n", + "| explained_variance | -1.49 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1268 |\n", + "| policy_loss | -1.21 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0253 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 9/600: Total Reward = -5.45\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 46.5 |\n", + "| ep_rew_mean | -9.71 |\n", + "| success_rate | 0.12 |\n", + "| time/ | |\n", + "| fps | 245 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 7180 |\n", + "| train/ | |\n", + "| entropy_loss | -10 |\n", + "| explained_variance | -12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1435 |\n", + "| policy_loss | -0.157 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.025 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 10/600: Total Reward = -22.55\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 44 |\n", + "| ep_rew_mean | -8.76 |\n", + "| success_rate | 0.17 |\n", + "| time/ | |\n", + "| fps | 180 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 8015 |\n", + "| train/ | |\n", + "| entropy_loss | -10 |\n", + "| explained_variance | -0.479 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1602 |\n", + "| policy_loss | -2.11 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0749 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 11/600: Total Reward = -12.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 43.5 |\n", + "| ep_rew_mean | -8.45 |\n", + "| success_rate | 0.19 |\n", + "| time/ | |\n", + "| fps | 232 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 8850 |\n", + "| train/ | |\n", + "| entropy_loss | -10 |\n", + "| explained_variance | 0.465 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1769 |\n", + "| policy_loss | 3.85 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.136 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 12/600: Total Reward = -12.57\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 43.1 |\n", + "| ep_rew_mean | -8.56 |\n", + "| success_rate | 0.19 |\n", + "| time/ | |\n", + "| fps | 235 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 9685 |\n", + "| train/ | |\n", + "| entropy_loss | -9.99 |\n", + "| explained_variance | -10.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1936 |\n", + "| policy_loss | -0.244 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.00153 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 13/600: Total Reward = -13.70\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41.6 |\n", + "| ep_rew_mean | -8.35 |\n", + "| success_rate | 0.23 |\n", + "| time/ | |\n", + "| fps | 170 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 10520 |\n", + "| train/ | |\n", + "| entropy_loss | -9.97 |\n", + "| explained_variance | 0.699 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2103 |\n", + "| policy_loss | -0.271 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.00199 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 14/600: Total Reward = -4.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 42 |\n", + "| ep_rew_mean | -8.37 |\n", + "| success_rate | 0.21 |\n", + "| time/ | |\n", + "| fps | 236 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 11355 |\n", + "| train/ | |\n", + "| entropy_loss | -9.99 |\n", + "| explained_variance | 0.573 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2270 |\n", + "| policy_loss | -1.8 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0347 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 15/600: Total Reward = -11.69\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 42.3 |\n", + "| ep_rew_mean | -8.42 |\n", + "| success_rate | 0.2 |\n", + "| time/ | |\n", + "| fps | 159 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 12190 |\n", + "| train/ | |\n", + "| entropy_loss | -10 |\n", + "| explained_variance | -0.75 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2437 |\n", + "| policy_loss | -1.89 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0585 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 16/600: Total Reward = -8.44\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 42 |\n", + "| ep_rew_mean | -8.2 |\n", + "| success_rate | 0.22 |\n", + "| time/ | |\n", + "| fps | 175 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 13025 |\n", + "| train/ | |\n", + "| entropy_loss | -10 |\n", + "| explained_variance | -0.949 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2604 |\n", + "| policy_loss | -0.545 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0118 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 17/600: Total Reward = -7.67\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 42.2 |\n", + "| ep_rew_mean | -8.15 |\n", + "| success_rate | 0.23 |\n", + "| time/ | |\n", + "| fps | 235 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 13860 |\n", + "| train/ | |\n", + "| entropy_loss | -9.98 |\n", + "| explained_variance | 0.997 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2771 |\n", + "| policy_loss | 2.49 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0515 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 18/600: Total Reward = -12.78\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41.7 |\n", + "| ep_rew_mean | -7.84 |\n", + "| success_rate | 0.26 |\n", + "| time/ | |\n", + "| fps | 231 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 14695 |\n", + "| train/ | |\n", + "| entropy_loss | -9.99 |\n", + "| explained_variance | -0.839 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 2938 |\n", + "| policy_loss | -2.51 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0825 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 19/600: Total Reward = -8.54\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.4 |\n", + "| ep_rew_mean | -7.62 |\n", + "| success_rate | 0.3 |\n", + "| time/ | |\n", + "| fps | 224 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 15530 |\n", + "| train/ | |\n", + "| entropy_loss | -9.99 |\n", + "| explained_variance | 0.683 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3105 |\n", + "| policy_loss | 0.00647 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.00259 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 20/600: Total Reward = -8.93\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41.5 |\n", + "| ep_rew_mean | -7.9 |\n", + "| success_rate | 0.28 |\n", + "| time/ | |\n", + "| fps | 209 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 16365 |\n", + "| train/ | |\n", + "| entropy_loss | -9.98 |\n", + "| explained_variance | -8.86 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3272 |\n", + "| policy_loss | -1.84 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.063 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 21/600: Total Reward = -7.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41.9 |\n", + "| ep_rew_mean | -8.04 |\n", + "| success_rate | 0.27 |\n", + "| time/ | |\n", + "| fps | 238 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 17200 |\n", + "| train/ | |\n", + "| entropy_loss | -9.99 |\n", + "| explained_variance | 0.192 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3439 |\n", + "| policy_loss | 0.266 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.00794 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 22/600: Total Reward = -9.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41 |\n", + "| ep_rew_mean | -7.9 |\n", + "| success_rate | 0.3 |\n", + "| time/ | |\n", + "| fps | 239 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 18035 |\n", + "| train/ | |\n", + "| entropy_loss | -9.97 |\n", + "| explained_variance | -2.31 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3606 |\n", + "| policy_loss | -0.138 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.00485 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 23/600: Total Reward = -3.62\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.4 |\n", + "| ep_rew_mean | -7.9 |\n", + "| success_rate | 0.31 |\n", + "| time/ | |\n", + "| fps | 144 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 18870 |\n", + "| train/ | |\n", + "| entropy_loss | -10 |\n", + "| explained_variance | -657 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3773 |\n", + "| policy_loss | -2.89 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.179 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 24/600: Total Reward = -8.73\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.6 |\n", + "| ep_rew_mean | -7.67 |\n", + "| success_rate | 0.31 |\n", + "| time/ | |\n", + "| fps | 240 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 19705 |\n", + "| train/ | |\n", + "| entropy_loss | -9.98 |\n", + "| explained_variance | 0.536 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 3940 |\n", + "| policy_loss | -3.37 |\n", + "| std | 1.01 |\n", + "| value_loss | 0.0991 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 25/600: Total Reward = -12.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.2 |\n", + "| ep_rew_mean | -7.42 |\n", + "| success_rate | 0.33 |\n", + "| time/ | |\n", + "| fps | 240 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 20540 |\n", + "| train/ | |\n", + "| entropy_loss | -9.94 |\n", + "| explained_variance | -3.64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4107 |\n", + "| policy_loss | -0.102 |\n", + "| std | 1 |\n", + "| value_loss | 0.00501 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 26/600: Total Reward = -0.68\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.7 |\n", + "| ep_rew_mean | -7.26 |\n", + "| success_rate | 0.31 |\n", + "| time/ | |\n", + "| fps | 197 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 21375 |\n", + "| train/ | |\n", + "| entropy_loss | -9.92 |\n", + "| explained_variance | -7.71 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4274 |\n", + "| policy_loss | -4.1 |\n", + "| std | 0.999 |\n", + "| value_loss | 0.17 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 27/600: Total Reward = -3.81\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40 |\n", + "| ep_rew_mean | -7.08 |\n", + "| success_rate | 0.33 |\n", + "| time/ | |\n", + "| fps | 177 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 22210 |\n", + "| train/ | |\n", + "| entropy_loss | -9.91 |\n", + "| explained_variance | 0.521 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4441 |\n", + "| policy_loss | -0.318 |\n", + "| std | 0.998 |\n", + "| value_loss | 0.0032 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 28/600: Total Reward = -3.63\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.8 |\n", + "| ep_rew_mean | -7.22 |\n", + "| success_rate | 0.3 |\n", + "| time/ | |\n", + "| fps | 232 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 23045 |\n", + "| train/ | |\n", + "| entropy_loss | -9.89 |\n", + "| explained_variance | -1.39 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4608 |\n", + "| policy_loss | 2.2 |\n", + "| std | 0.994 |\n", + "| value_loss | 0.0657 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 29/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.2 |\n", + "| ep_rew_mean | -7.28 |\n", + "| success_rate | 0.31 |\n", + "| time/ | |\n", + "| fps | 233 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 23880 |\n", + "| train/ | |\n", + "| entropy_loss | -9.88 |\n", + "| explained_variance | -7.95 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4775 |\n", + "| policy_loss | -1.82 |\n", + "| std | 0.993 |\n", + "| value_loss | 0.0515 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 30/600: Total Reward = -6.81\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.5 |\n", + "| ep_rew_mean | -6.96 |\n", + "| success_rate | 0.33 |\n", + "| time/ | |\n", + "| fps | 127 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 24715 |\n", + "| train/ | |\n", + "| entropy_loss | -9.89 |\n", + "| explained_variance | 0.863 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 4942 |\n", + "| policy_loss | 0.265 |\n", + "| std | 0.994 |\n", + "| value_loss | 0.00245 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 31/600: Total Reward = -11.96\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.3 |\n", + "| ep_rew_mean | -6.89 |\n", + "| success_rate | 0.34 |\n", + "| time/ | |\n", + "| fps | 238 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 25550 |\n", + "| train/ | |\n", + "| entropy_loss | -9.87 |\n", + "| explained_variance | 0.152 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5109 |\n", + "| policy_loss | 0.859 |\n", + "| std | 0.991 |\n", + "| value_loss | 0.0129 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 32/600: Total Reward = -1.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.8 |\n", + "| ep_rew_mean | -6.83 |\n", + "| success_rate | 0.31 |\n", + "| time/ | |\n", + "| fps | 222 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 26385 |\n", + "| train/ | |\n", + "| entropy_loss | -9.9 |\n", + "| explained_variance | 0.512 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5276 |\n", + "| policy_loss | -2.22 |\n", + "| std | 0.996 |\n", + "| value_loss | 0.0743 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 33/600: Total Reward = -7.95\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.1 |\n", + "| ep_rew_mean | -6.14 |\n", + "| success_rate | 0.39 |\n", + "| time/ | |\n", + "| fps | 194 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 27220 |\n", + "| train/ | |\n", + "| entropy_loss | -9.85 |\n", + "| explained_variance | 0.997 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5443 |\n", + "| policy_loss | -0.454 |\n", + "| std | 0.99 |\n", + "| value_loss | 0.00744 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 34/600: Total Reward = -8.72\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.9 |\n", + "| ep_rew_mean | -6.14 |\n", + "| success_rate | 0.39 |\n", + "| time/ | |\n", + "| fps | 227 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 28055 |\n", + "| train/ | |\n", + "| entropy_loss | -9.88 |\n", + "| explained_variance | -2.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5610 |\n", + "| policy_loss | 0.448 |\n", + "| std | 0.993 |\n", + "| value_loss | 0.00644 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 35/600: Total Reward = -6.96\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.3 |\n", + "| ep_rew_mean | -5.67 |\n", + "| success_rate | 0.44 |\n", + "| time/ | |\n", + "| fps | 233 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 28890 |\n", + "| train/ | |\n", + "| entropy_loss | -9.89 |\n", + "| explained_variance | -0.185 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5777 |\n", + "| policy_loss | -1.8 |\n", + "| std | 0.995 |\n", + "| value_loss | 0.0397 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 36/600: Total Reward = -8.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.7 |\n", + "| ep_rew_mean | -5.32 |\n", + "| success_rate | 0.47 |\n", + "| time/ | |\n", + "| fps | 168 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 29725 |\n", + "| train/ | |\n", + "| entropy_loss | -9.89 |\n", + "| explained_variance | 0.331 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 5944 |\n", + "| policy_loss | -0.00233 |\n", + "| std | 0.994 |\n", + "| value_loss | 0.000924 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 37/600: Total Reward = -9.06\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.5 |\n", + "| ep_rew_mean | -5.21 |\n", + "| success_rate | 0.45 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 30560 |\n", + "| train/ | |\n", + "| entropy_loss | -9.91 |\n", + "| explained_variance | 0.367 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6111 |\n", + "| policy_loss | -2.74 |\n", + "| std | 0.998 |\n", + "| value_loss | 0.102 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 38/600: Total Reward = -0.85\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 35.5 |\n", + "| ep_rew_mean | -5.02 |\n", + "| success_rate | 0.43 |\n", + "| time/ | |\n", + "| fps | 228 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 31395 |\n", + "| train/ | |\n", + "| entropy_loss | -9.88 |\n", + "| explained_variance | -1.54 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6278 |\n", + "| policy_loss | -0.46 |\n", + "| std | 0.993 |\n", + "| value_loss | 0.00275 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 39/600: Total Reward = -0.77\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.2 |\n", + "| ep_rew_mean | -4.6 |\n", + "| success_rate | 0.47 |\n", + "| time/ | |\n", + "| fps | 178 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 32230 |\n", + "| train/ | |\n", + "| entropy_loss | -9.88 |\n", + "| explained_variance | -103 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6445 |\n", + "| policy_loss | -0.649 |\n", + "| std | 0.993 |\n", + "| value_loss | 0.0219 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 40/600: Total Reward = -12.44\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.2 |\n", + "| ep_rew_mean | -4.59 |\n", + "| success_rate | 0.48 |\n", + "| time/ | |\n", + "| fps | 237 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 33065 |\n", + "| train/ | |\n", + "| entropy_loss | -9.88 |\n", + "| explained_variance | -4.01 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6612 |\n", + "| policy_loss | 0.486 |\n", + "| std | 0.993 |\n", + "| value_loss | 0.00241 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 41/600: Total Reward = -5.94\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.5 |\n", + "| ep_rew_mean | -4.39 |\n", + "| success_rate | 0.52 |\n", + "| time/ | |\n", + "| fps | 237 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 33900 |\n", + "| train/ | |\n", + "| entropy_loss | -9.86 |\n", + "| explained_variance | -0.602 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6779 |\n", + "| policy_loss | 0.925 |\n", + "| std | 0.99 |\n", + "| value_loss | 0.0106 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 42/600: Total Reward = -7.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.9 |\n", + "| ep_rew_mean | -4.6 |\n", + "| success_rate | 0.5 |\n", + "| time/ | |\n", + "| fps | 228 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 34735 |\n", + "| train/ | |\n", + "| entropy_loss | -9.9 |\n", + "| explained_variance | 0.526 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 6946 |\n", + "| policy_loss | 0.21 |\n", + "| std | 0.996 |\n", + "| value_loss | 0.000394 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 43/600: Total Reward = -6.08\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.9 |\n", + "| ep_rew_mean | -4.97 |\n", + "| success_rate | 0.45 |\n", + "| time/ | |\n", + "| fps | 206 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 35570 |\n", + "| train/ | |\n", + "| entropy_loss | -9.92 |\n", + "| explained_variance | -14.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7113 |\n", + "| policy_loss | -1.46 |\n", + "| std | 0.999 |\n", + "| value_loss | 0.0234 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 44/600: Total Reward = -5.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.7 |\n", + "| ep_rew_mean | -5.33 |\n", + "| success_rate | 0.38 |\n", + "| time/ | |\n", + "| fps | 153 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 36405 |\n", + "| train/ | |\n", + "| entropy_loss | -9.89 |\n", + "| explained_variance | 0.34 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7280 |\n", + "| policy_loss | 0.0546 |\n", + "| std | 0.994 |\n", + "| value_loss | 0.00271 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 45/600: Total Reward = -5.22\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.7 |\n", + "| ep_rew_mean | -5.32 |\n", + "| success_rate | 0.34 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 37240 |\n", + "| train/ | |\n", + "| entropy_loss | -9.89 |\n", + "| explained_variance | 0.186 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7447 |\n", + "| policy_loss | -3.88 |\n", + "| std | 0.994 |\n", + "| value_loss | 0.144 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 46/600: Total Reward = -7.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.2 |\n", + "| ep_rew_mean | -5.23 |\n", + "| success_rate | 0.36 |\n", + "| time/ | |\n", + "| fps | 211 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 38075 |\n", + "| train/ | |\n", + "| entropy_loss | -9.88 |\n", + "| explained_variance | 0.997 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7614 |\n", + "| policy_loss | -1.11 |\n", + "| std | 0.992 |\n", + "| value_loss | 0.0117 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 47/600: Total Reward = -8.51\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.5 |\n", + "| ep_rew_mean | -5.22 |\n", + "| success_rate | 0.36 |\n", + "| time/ | |\n", + "| fps | 234 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 38910 |\n", + "| train/ | |\n", + "| entropy_loss | -9.86 |\n", + "| explained_variance | 0.519 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7781 |\n", + "| policy_loss | 57 |\n", + "| std | 0.99 |\n", + "| value_loss | 58.3 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 48/600: Total Reward = -7.36\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37 |\n", + "| ep_rew_mean | -5.03 |\n", + "| success_rate | 0.37 |\n", + "| time/ | |\n", + "| fps | 236 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 39745 |\n", + "| train/ | |\n", + "| entropy_loss | -9.88 |\n", + "| explained_variance | 0.875 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 7948 |\n", + "| policy_loss | 1.43 |\n", + "| std | 0.993 |\n", + "| value_loss | 0.0167 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 49/600: Total Reward = -16.26\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38 |\n", + "| ep_rew_mean | -5.2 |\n", + "| success_rate | 0.35 |\n", + "| time/ | |\n", + "| fps | 162 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 40580 |\n", + "| train/ | |\n", + "| entropy_loss | -9.87 |\n", + "| explained_variance | 1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8115 |\n", + "| policy_loss | -0.126 |\n", + "| std | 0.992 |\n", + "| value_loss | 0.000681 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 50/600: Total Reward = -7.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.7 |\n", + "| ep_rew_mean | -4.95 |\n", + "| success_rate | 0.4 |\n", + "| time/ | |\n", + "| fps | 234 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 41415 |\n", + "| train/ | |\n", + "| entropy_loss | -9.86 |\n", + "| explained_variance | -1.06 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8282 |\n", + "| policy_loss | -1.11 |\n", + "| std | 0.99 |\n", + "| value_loss | 0.0147 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 51/600: Total Reward = -4.06\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.4 |\n", + "| ep_rew_mean | -4.08 |\n", + "| success_rate | 0.51 |\n", + "| time/ | |\n", + "| fps | 223 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 42250 |\n", + "| train/ | |\n", + "| entropy_loss | -9.87 |\n", + "| explained_variance | -11.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8449 |\n", + "| policy_loss | 0.723 |\n", + "| std | 0.992 |\n", + "| value_loss | 0.00806 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 52/600: Total Reward = -7.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.4 |\n", + "| ep_rew_mean | -3.99 |\n", + "| success_rate | 0.56 |\n", + "| time/ | |\n", + "| fps | 174 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 43085 |\n", + "| train/ | |\n", + "| entropy_loss | -9.85 |\n", + "| explained_variance | -2.12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8616 |\n", + "| policy_loss | -0.147 |\n", + "| std | 0.989 |\n", + "| value_loss | 0.000449 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 53/600: Total Reward = -9.85\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.8 |\n", + "| ep_rew_mean | -3.78 |\n", + "| success_rate | 0.61 |\n", + "| time/ | |\n", + "| fps | 230 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 43920 |\n", + "| train/ | |\n", + "| entropy_loss | -9.83 |\n", + "| explained_variance | -8.53 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8783 |\n", + "| policy_loss | -0.556 |\n", + "| std | 0.986 |\n", + "| value_loss | 0.00337 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 54/600: Total Reward = -6.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.6 |\n", + "| ep_rew_mean | -3.65 |\n", + "| success_rate | 0.64 |\n", + "| time/ | |\n", + "| fps | 235 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 44755 |\n", + "| train/ | |\n", + "| entropy_loss | -9.81 |\n", + "| explained_variance | -1.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 8950 |\n", + "| policy_loss | 0.0117 |\n", + "| std | 0.984 |\n", + "| value_loss | 0.00293 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 55/600: Total Reward = -10.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.6 |\n", + "| ep_rew_mean | -3.36 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 45590 |\n", + "| train/ | |\n", + "| entropy_loss | -9.77 |\n", + "| explained_variance | -1.68 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9117 |\n", + "| policy_loss | 1.44 |\n", + "| std | 0.978 |\n", + "| value_loss | 0.0361 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 56/600: Total Reward = -9.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.3 |\n", + "| ep_rew_mean | -3.31 |\n", + "| success_rate | 0.68 |\n", + "| time/ | |\n", + "| fps | 179 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 46425 |\n", + "| train/ | |\n", + "| entropy_loss | -9.76 |\n", + "| explained_variance | -12.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9284 |\n", + "| policy_loss | -1.32 |\n", + "| std | 0.977 |\n", + "| value_loss | 0.0275 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 57/600: Total Reward = -8.75\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.4 |\n", + "| ep_rew_mean | -3.78 |\n", + "| success_rate | 0.62 |\n", + "| time/ | |\n", + "| fps | 224 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 47260 |\n", + "| train/ | |\n", + "| entropy_loss | -9.8 |\n", + "| explained_variance | -2.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9451 |\n", + "| policy_loss | 0.601 |\n", + "| std | 0.983 |\n", + "| value_loss | 0.00651 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 58/600: Total Reward = -11.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.5 |\n", + "| ep_rew_mean | -3.99 |\n", + "| success_rate | 0.57 |\n", + "| time/ | |\n", + "| fps | 224 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 48095 |\n", + "| train/ | |\n", + "| entropy_loss | -9.78 |\n", + "| explained_variance | 0.517 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9618 |\n", + "| policy_loss | -1.1 |\n", + "| std | 0.98 |\n", + "| value_loss | 0.015 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 59/600: Total Reward = -4.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.9 |\n", + "| ep_rew_mean | -4.24 |\n", + "| success_rate | 0.54 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 48930 |\n", + "| train/ | |\n", + "| entropy_loss | -9.74 |\n", + "| explained_variance | 0.243 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9785 |\n", + "| policy_loss | -0.871 |\n", + "| std | 0.975 |\n", + "| value_loss | 0.00997 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 60/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.4 |\n", + "| ep_rew_mean | -4.13 |\n", + "| success_rate | 0.54 |\n", + "| time/ | |\n", + "| fps | 228 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 49765 |\n", + "| train/ | |\n", + "| entropy_loss | -9.73 |\n", + "| explained_variance | 0.225 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 9952 |\n", + "| policy_loss | -0.35 |\n", + "| std | 0.974 |\n", + "| value_loss | 0.00356 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 61/600: Total Reward = -3.88\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.1 |\n", + "| ep_rew_mean | -3.45 |\n", + "| success_rate | 0.65 |\n", + "| time/ | |\n", + "| fps | 226 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 50600 |\n", + "| train/ | |\n", + "| entropy_loss | -9.71 |\n", + "| explained_variance | -4.07 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10119 |\n", + "| policy_loss | 0.973 |\n", + "| std | 0.971 |\n", + "| value_loss | 0.0167 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 62/600: Total Reward = -5.93\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.6 |\n", + "| ep_rew_mean | -3.06 |\n", + "| success_rate | 0.69 |\n", + "| time/ | |\n", + "| fps | 153 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 51435 |\n", + "| train/ | |\n", + "| entropy_loss | -9.72 |\n", + "| explained_variance | -112 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10286 |\n", + "| policy_loss | 0.703 |\n", + "| std | 0.971 |\n", + "| value_loss | 0.00935 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 63/600: Total Reward = -0.94\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.7 |\n", + "| ep_rew_mean | -3.14 |\n", + "| success_rate | 0.7 |\n", + "| time/ | |\n", + "| fps | 225 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 52270 |\n", + "| train/ | |\n", + "| entropy_loss | -9.71 |\n", + "| explained_variance | 0.621 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10453 |\n", + "| policy_loss | 1.11 |\n", + "| std | 0.971 |\n", + "| value_loss | 0.0136 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 64/600: Total Reward = -0.56\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24 |\n", + "| ep_rew_mean | -3.15 |\n", + "| success_rate | 0.69 |\n", + "| time/ | |\n", + "| fps | 227 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 53105 |\n", + "| train/ | |\n", + "| entropy_loss | -9.73 |\n", + "| explained_variance | -147 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10620 |\n", + "| policy_loss | -6.91 |\n", + "| std | 0.974 |\n", + "| value_loss | 0.579 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 65/600: Total Reward = -13.02\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.1 |\n", + "| ep_rew_mean | -3.83 |\n", + "| success_rate | 0.59 |\n", + "| time/ | |\n", + "| fps | 188 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 53940 |\n", + "| train/ | |\n", + "| entropy_loss | -9.73 |\n", + "| explained_variance | 0.142 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10787 |\n", + "| policy_loss | 0.0339 |\n", + "| std | 0.974 |\n", + "| value_loss | 0.00341 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 66/600: Total Reward = -6.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.1 |\n", + "| ep_rew_mean | -4.17 |\n", + "| success_rate | 0.51 |\n", + "| time/ | |\n", + "| fps | 226 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 54775 |\n", + "| train/ | |\n", + "| entropy_loss | -9.72 |\n", + "| explained_variance | 0.194 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 10954 |\n", + "| policy_loss | 0.813 |\n", + "| std | 0.972 |\n", + "| value_loss | 0.00733 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 67/600: Total Reward = -7.57\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.2 |\n", + "| ep_rew_mean | -3.72 |\n", + "| success_rate | 0.56 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 55610 |\n", + "| train/ | |\n", + "| entropy_loss | -9.73 |\n", + "| explained_variance | -3.01 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11121 |\n", + "| policy_loss | -2.35 |\n", + "| std | 0.973 |\n", + "| value_loss | 0.066 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 68/600: Total Reward = -6.49\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.2 |\n", + "| ep_rew_mean | -4.56 |\n", + "| success_rate | 0.45 |\n", + "| time/ | |\n", + "| fps | 228 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 56445 |\n", + "| train/ | |\n", + "| entropy_loss | -9.73 |\n", + "| explained_variance | 0.815 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11288 |\n", + "| policy_loss | -0.783 |\n", + "| std | 0.974 |\n", + "| value_loss | 0.0633 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 69/600: Total Reward = -7.57\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.4 |\n", + "| ep_rew_mean | -4.31 |\n", + "| success_rate | 0.54 |\n", + "| time/ | |\n", + "| fps | 187 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 57280 |\n", + "| train/ | |\n", + "| entropy_loss | -9.73 |\n", + "| explained_variance | -0.832 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11455 |\n", + "| policy_loss | 0.835 |\n", + "| std | 0.973 |\n", + "| value_loss | 0.0142 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 70/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.7 |\n", + "| ep_rew_mean | -3.95 |\n", + "| success_rate | 0.62 |\n", + "| time/ | |\n", + "| fps | 234 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 58115 |\n", + "| train/ | |\n", + "| entropy_loss | -9.74 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11622 |\n", + "| policy_loss | 0.801 |\n", + "| std | 0.974 |\n", + "| value_loss | 0.0175 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 71/600: Total Reward = -5.61\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.4 |\n", + "| ep_rew_mean | -3.83 |\n", + "| success_rate | 0.61 |\n", + "| time/ | |\n", + "| fps | 224 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 58950 |\n", + "| train/ | |\n", + "| entropy_loss | -9.73 |\n", + "| explained_variance | -14.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11789 |\n", + "| policy_loss | 1.69 |\n", + "| std | 0.973 |\n", + "| value_loss | 0.0434 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 72/600: Total Reward = -8.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.5 |\n", + "| ep_rew_mean | -3.01 |\n", + "| success_rate | 0.7 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 59785 |\n", + "| train/ | |\n", + "| entropy_loss | -9.71 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 11956 |\n", + "| policy_loss | 0.835 |\n", + "| std | 0.971 |\n", + "| value_loss | 0.0141 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 73/600: Total Reward = -0.54\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.9 |\n", + "| ep_rew_mean | -3.45 |\n", + "| success_rate | 0.62 |\n", + "| time/ | |\n", + "| fps | 224 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 60620 |\n", + "| train/ | |\n", + "| entropy_loss | -9.7 |\n", + "| explained_variance | 0.465 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12123 |\n", + "| policy_loss | -1.22 |\n", + "| std | 0.97 |\n", + "| value_loss | 0.0189 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 74/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.6 |\n", + "| ep_rew_mean | -3.85 |\n", + "| success_rate | 0.57 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 61455 |\n", + "| train/ | |\n", + "| entropy_loss | -9.69 |\n", + "| explained_variance | -2.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12290 |\n", + "| policy_loss | 0.824 |\n", + "| std | 0.968 |\n", + "| value_loss | 0.0122 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 75/600: Total Reward = -2.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.9 |\n", + "| ep_rew_mean | -3.57 |\n", + "| success_rate | 0.61 |\n", + "| time/ | |\n", + "| fps | 168 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 62290 |\n", + "| train/ | |\n", + "| entropy_loss | -9.69 |\n", + "| explained_variance | 0.508 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12457 |\n", + "| policy_loss | -0.331 |\n", + "| std | 0.969 |\n", + "| value_loss | 0.00465 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 76/600: Total Reward = -1.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.1 |\n", + "| ep_rew_mean | -3.79 |\n", + "| success_rate | 0.57 |\n", + "| time/ | |\n", + "| fps | 226 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 63125 |\n", + "| train/ | |\n", + "| entropy_loss | -9.69 |\n", + "| explained_variance | -0.0705 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12624 |\n", + "| policy_loss | -0.565 |\n", + "| std | 0.97 |\n", + "| value_loss | 0.00712 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 77/600: Total Reward = -2.60\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.9 |\n", + "| ep_rew_mean | -2.95 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 229 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 63960 |\n", + "| train/ | |\n", + "| entropy_loss | -9.71 |\n", + "| explained_variance | 0.0542 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12791 |\n", + "| policy_loss | -0.791 |\n", + "| std | 0.972 |\n", + "| value_loss | 0.0103 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 78/600: Total Reward = -1.56\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.3 |\n", + "| ep_rew_mean | -3.14 |\n", + "| success_rate | 0.65 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 64795 |\n", + "| train/ | |\n", + "| entropy_loss | -9.68 |\n", + "| explained_variance | 0.774 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 12958 |\n", + "| policy_loss | -0.193 |\n", + "| std | 0.969 |\n", + "| value_loss | 0.00103 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 79/600: Total Reward = -6.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.5 |\n", + "| ep_rew_mean | -2.96 |\n", + "| success_rate | 0.7 |\n", + "| time/ | |\n", + "| fps | 193 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 65630 |\n", + "| train/ | |\n", + "| entropy_loss | -9.67 |\n", + "| explained_variance | -6.94 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13125 |\n", + "| policy_loss | -0.14 |\n", + "| std | 0.968 |\n", + "| value_loss | 0.00135 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 80/600: Total Reward = -5.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.8 |\n", + "| ep_rew_mean | -2.93 |\n", + "| success_rate | 0.75 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 66465 |\n", + "| train/ | |\n", + "| entropy_loss | -9.68 |\n", + "| explained_variance | 0.467 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13292 |\n", + "| policy_loss | 0.508 |\n", + "| std | 0.969 |\n", + "| value_loss | 0.00341 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 81/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.3 |\n", + "| ep_rew_mean | -2.76 |\n", + "| success_rate | 0.76 |\n", + "| time/ | |\n", + "| fps | 225 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 67300 |\n", + "| train/ | |\n", + "| entropy_loss | -9.69 |\n", + "| explained_variance | 0.0985 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13459 |\n", + "| policy_loss | 28.4 |\n", + "| std | 0.969 |\n", + "| value_loss | 25.9 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 82/600: Total Reward = -6.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.8 |\n", + "| ep_rew_mean | -2.74 |\n", + "| success_rate | 0.74 |\n", + "| time/ | |\n", + "| fps | 157 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 68135 |\n", + "| train/ | |\n", + "| entropy_loss | -9.66 |\n", + "| explained_variance | -26.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13626 |\n", + "| policy_loss | 0.416 |\n", + "| std | 0.966 |\n", + "| value_loss | 0.00239 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 83/600: Total Reward = -0.31\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.6 |\n", + "| ep_rew_mean | -2.45 |\n", + "| success_rate | 0.78 |\n", + "| time/ | |\n", + "| fps | 222 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 68970 |\n", + "| train/ | |\n", + "| entropy_loss | -9.66 |\n", + "| explained_variance | -2.01 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13793 |\n", + "| policy_loss | -0.698 |\n", + "| std | 0.965 |\n", + "| value_loss | 0.0192 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 84/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.3 |\n", + "| ep_rew_mean | -2.47 |\n", + "| success_rate | 0.82 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 69805 |\n", + "| train/ | |\n", + "| entropy_loss | -9.63 |\n", + "| explained_variance | 0.793 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 13960 |\n", + "| policy_loss | -0.269 |\n", + "| std | 0.961 |\n", + "| value_loss | 0.00125 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 85/600: Total Reward = -0.63\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.3 |\n", + "| ep_rew_mean | -2.2 |\n", + "| success_rate | 0.86 |\n", + "| time/ | |\n", + "| fps | 167 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 70640 |\n", + "| train/ | |\n", + "| entropy_loss | -9.6 |\n", + "| explained_variance | -1.22 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14127 |\n", + "| policy_loss | -1.09 |\n", + "| std | 0.958 |\n", + "| value_loss | 0.0224 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 86/600: Total Reward = -0.85\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 15.3 |\n", + "| ep_rew_mean | -1.51 |\n", + "| success_rate | 0.91 |\n", + "| time/ | |\n", + "| fps | 224 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 71475 |\n", + "| train/ | |\n", + "| entropy_loss | -9.57 |\n", + "| explained_variance | -0.149 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14294 |\n", + "| policy_loss | -2.14 |\n", + "| std | 0.954 |\n", + "| value_loss | 0.064 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 87/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.4 |\n", + "| ep_rew_mean | -1.05 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 223 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 72310 |\n", + "| train/ | |\n", + "| entropy_loss | -9.53 |\n", + "| explained_variance | -1.36 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14461 |\n", + "| policy_loss | -3.07 |\n", + "| std | 0.948 |\n", + "| value_loss | 0.141 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 88/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.1 |\n", + "| ep_rew_mean | -0.956 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 209 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 73145 |\n", + "| train/ | |\n", + "| entropy_loss | -9.44 |\n", + "| explained_variance | -3.26 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14628 |\n", + "| policy_loss | -1.73 |\n", + "| std | 0.937 |\n", + "| value_loss | 0.0614 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 89/600: Total Reward = -0.77\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.83 |\n", + "| ep_rew_mean | -0.816 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 190 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 73980 |\n", + "| train/ | |\n", + "| entropy_loss | -9.42 |\n", + "| explained_variance | -20.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14795 |\n", + "| policy_loss | -2.99 |\n", + "| std | 0.934 |\n", + "| value_loss | 0.204 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 90/600: Total Reward = -0.36\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.3 |\n", + "| ep_rew_mean | -1.12 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 227 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 74815 |\n", + "| train/ | |\n", + "| entropy_loss | -9.4 |\n", + "| explained_variance | -3.13 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 14962 |\n", + "| policy_loss | -3.65 |\n", + "| std | 0.933 |\n", + "| value_loss | 0.176 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 91/600: Total Reward = -2.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.6 |\n", + "| ep_rew_mean | -1.4 |\n", + "| success_rate | 0.92 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 75650 |\n", + "| train/ | |\n", + "| entropy_loss | -9.37 |\n", + "| explained_variance | -0.164 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15129 |\n", + "| policy_loss | 30.2 |\n", + "| std | 0.928 |\n", + "| value_loss | 15.5 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 92/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.15 |\n", + "| ep_rew_mean | -1.01 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 152 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 76485 |\n", + "| train/ | |\n", + "| entropy_loss | -9.34 |\n", + "| explained_variance | -13.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15296 |\n", + "| policy_loss | -5.69 |\n", + "| std | 0.925 |\n", + "| value_loss | 0.437 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 93/600: Total Reward = -0.52\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.71 |\n", + "| ep_rew_mean | -1.39 |\n", + "| success_rate | 0.92 |\n", + "| time/ | |\n", + "| fps | 229 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 77320 |\n", + "| train/ | |\n", + "| entropy_loss | -9.32 |\n", + "| explained_variance | 0.305 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15463 |\n", + "| policy_loss | 39.2 |\n", + "| std | 0.922 |\n", + "| value_loss | 21 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 94/600: Total Reward = -0.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.2 |\n", + "| ep_rew_mean | -1.52 |\n", + "| success_rate | 0.9 |\n", + "| time/ | |\n", + "| fps | 220 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 78155 |\n", + "| train/ | |\n", + "| entropy_loss | -9.36 |\n", + "| explained_variance | 0.0849 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15630 |\n", + "| policy_loss | 37.2 |\n", + "| std | 0.927 |\n", + "| value_loss | 29.9 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 95/600: Total Reward = -0.88\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.9 |\n", + "| ep_rew_mean | -1.52 |\n", + "| success_rate | 0.89 |\n", + "| time/ | |\n", + "| fps | 170 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 78990 |\n", + "| train/ | |\n", + "| entropy_loss | -9.32 |\n", + "| explained_variance | -190 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15797 |\n", + "| policy_loss | -3.64 |\n", + "| std | 0.921 |\n", + "| value_loss | 0.187 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 96/600: Total Reward = -0.05\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.6 |\n", + "| ep_rew_mean | -0.933 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 222 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 79825 |\n", + "| train/ | |\n", + "| entropy_loss | -9.28 |\n", + "| explained_variance | -4.48 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 15964 |\n", + "| policy_loss | 20.6 |\n", + "| std | 0.916 |\n", + "| value_loss | 11.3 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 97/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.35 |\n", + "| ep_rew_mean | -1.1 |\n", + "| success_rate | 0.94 |\n", + "| time/ | |\n", + "| fps | 220 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 80660 |\n", + "| train/ | |\n", + "| entropy_loss | -9.25 |\n", + "| explained_variance | -0.0488 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16131 |\n", + "| policy_loss | 38.2 |\n", + "| std | 0.912 |\n", + "| value_loss | 27.9 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 98/600: Total Reward = -0.91\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 13.1 |\n", + "| ep_rew_mean | -1.96 |\n", + "| success_rate | 0.87 |\n", + "| time/ | |\n", + "| fps | 201 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 81495 |\n", + "| train/ | |\n", + "| entropy_loss | -9.26 |\n", + "| explained_variance | -23.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16298 |\n", + "| policy_loss | 2.97 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.114 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 99/600: Total Reward = -1.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.5 |\n", + "| ep_rew_mean | -1.6 |\n", + "| success_rate | 0.92 |\n", + "| time/ | |\n", + "| fps | 189 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 82330 |\n", + "| train/ | |\n", + "| entropy_loss | -9.27 |\n", + "| explained_variance | -25.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16465 |\n", + "| policy_loss | 1.41 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.0301 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 100/600: Total Reward = -0.64\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.4 |\n", + "| ep_rew_mean | -1.8 |\n", + "| success_rate | 0.88 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 83165 |\n", + "| train/ | |\n", + "| entropy_loss | -9.25 |\n", + "| explained_variance | -31 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16632 |\n", + "| policy_loss | 1.97 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0536 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 101/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.63 |\n", + "| ep_rew_mean | -1.1 |\n", + "| success_rate | 0.95 |\n", + "| time/ | |\n", + "| fps | 218 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 84000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.23 |\n", + "| explained_variance | -0.314 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16799 |\n", + "| policy_loss | -14.1 |\n", + "| std | 0.91 |\n", + "| value_loss | 2.76 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 102/600: Total Reward = -22.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 14.1 |\n", + "| ep_rew_mean | -3.03 |\n", + "| success_rate | 0.83 |\n", + "| time/ | |\n", + "| fps | 157 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 84835 |\n", + "| train/ | |\n", + "| entropy_loss | -9.26 |\n", + "| explained_variance | -323 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 16966 |\n", + "| policy_loss | 3.84 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.189 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 103/600: Total Reward = -21.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 19.2 |\n", + "| ep_rew_mean | -4.51 |\n", + "| success_rate | 0.7 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 85670 |\n", + "| train/ | |\n", + "| entropy_loss | -9.26 |\n", + "| explained_variance | -81.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17133 |\n", + "| policy_loss | 11.7 |\n", + "| std | 0.914 |\n", + "| value_loss | 2.31 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 104/600: Total Reward = -0.76\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.4 |\n", + "| ep_rew_mean | -5.75 |\n", + "| success_rate | 0.57 |\n", + "| time/ | |\n", + "| fps | 209 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 86505 |\n", + "| train/ | |\n", + "| entropy_loss | -9.25 |\n", + "| explained_variance | -727 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17300 |\n", + "| policy_loss | 18.1 |\n", + "| std | 0.912 |\n", + "| value_loss | 3.57 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 105/600: Total Reward = -10.08\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.7 |\n", + "| ep_rew_mean | -5.75 |\n", + "| success_rate | 0.52 |\n", + "| time/ | |\n", + "| fps | 143 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 87340 |\n", + "| train/ | |\n", + "| entropy_loss | -9.27 |\n", + "| explained_variance | -1.62 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17467 |\n", + "| policy_loss | -5.18 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.613 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 106/600: Total Reward = -9.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31 |\n", + "| ep_rew_mean | -6.68 |\n", + "| success_rate | 0.44 |\n", + "| time/ | |\n", + "| fps | 199 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 88175 |\n", + "| train/ | |\n", + "| entropy_loss | -9.31 |\n", + "| explained_variance | -2.89 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17634 |\n", + "| policy_loss | -2.37 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.0789 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 107/600: Total Reward = -16.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.9 |\n", + "| ep_rew_mean | -8.13 |\n", + "| success_rate | 0.31 |\n", + "| time/ | |\n", + "| fps | 194 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 89010 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -4.52 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17801 |\n", + "| policy_loss | -1.39 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.0888 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 108/600: Total Reward = -15.78\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.5 |\n", + "| ep_rew_mean | -8.26 |\n", + "| success_rate | 0.27 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 89845 |\n", + "| train/ | |\n", + "| entropy_loss | -9.31 |\n", + "| explained_variance | -24.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 17968 |\n", + "| policy_loss | -3.16 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.146 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 109/600: Total Reward = -5.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.8 |\n", + "| ep_rew_mean | -8.57 |\n", + "| success_rate | 0.23 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 90680 |\n", + "| train/ | |\n", + "| entropy_loss | -9.25 |\n", + "| explained_variance | -0.514 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18135 |\n", + "| policy_loss | 1.27 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.0225 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 110/600: Total Reward = -4.73\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41.4 |\n", + "| ep_rew_mean | -8.5 |\n", + "| success_rate | 0.2 |\n", + "| time/ | |\n", + "| fps | 210 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 91515 |\n", + "| train/ | |\n", + "| entropy_loss | -9.27 |\n", + "| explained_variance | 0.00535 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18302 |\n", + "| policy_loss | 1.43 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.032 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 111/600: Total Reward = -17.72\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 42.1 |\n", + "| ep_rew_mean | -8.4 |\n", + "| success_rate | 0.19 |\n", + "| time/ | |\n", + "| fps | 146 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 92350 |\n", + "| train/ | |\n", + "| entropy_loss | -9.29 |\n", + "| explained_variance | -64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18469 |\n", + "| policy_loss | -4.4 |\n", + "| std | 0.919 |\n", + "| value_loss | 0.282 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 112/600: Total Reward = -10.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.7 |\n", + "| ep_rew_mean | -7.8 |\n", + "| success_rate | 0.25 |\n", + "| time/ | |\n", + "| fps | 209 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 93185 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | 0.844 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18636 |\n", + "| policy_loss | -2.51 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.511 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 113/600: Total Reward = -9.46\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.4 |\n", + "| ep_rew_mean | -7.56 |\n", + "| success_rate | 0.27 |\n", + "| time/ | |\n", + "| fps | 203 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 94020 |\n", + "| train/ | |\n", + "| entropy_loss | -9.26 |\n", + "| explained_variance | -6.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18803 |\n", + "| policy_loss | -1.71 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.141 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 114/600: Total Reward = -14.84\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.6 |\n", + "| ep_rew_mean | -7.52 |\n", + "| success_rate | 0.29 |\n", + "| time/ | |\n", + "| fps | 138 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 94855 |\n", + "| train/ | |\n", + "| entropy_loss | -9.28 |\n", + "| explained_variance | 0.371 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 18970 |\n", + "| policy_loss | 1.7 |\n", + "| std | 0.918 |\n", + "| value_loss | 0.0702 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 115/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39 |\n", + "| ep_rew_mean | -7.79 |\n", + "| success_rate | 0.27 |\n", + "| time/ | |\n", + "| fps | 202 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 95690 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -71.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19137 |\n", + "| policy_loss | 0.609 |\n", + "| std | 0.922 |\n", + "| value_loss | 0.0187 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 116/600: Total Reward = -13.73\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39 |\n", + "| ep_rew_mean | -7.77 |\n", + "| success_rate | 0.27 |\n", + "| time/ | |\n", + "| fps | 198 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 96525 |\n", + "| train/ | |\n", + "| entropy_loss | -9.29 |\n", + "| explained_variance | 0.616 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19304 |\n", + "| policy_loss | 0.522 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.00361 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 117/600: Total Reward = -10.46\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.3 |\n", + "| ep_rew_mean | -7.75 |\n", + "| success_rate | 0.26 |\n", + "| time/ | |\n", + "| fps | 144 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 97360 |\n", + "| train/ | |\n", + "| entropy_loss | -9.29 |\n", + "| explained_variance | -3.86 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19471 |\n", + "| policy_loss | 5.16 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.395 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 118/600: Total Reward = -10.80\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.2 |\n", + "| ep_rew_mean | -7.88 |\n", + "| success_rate | 0.23 |\n", + "| time/ | |\n", + "| fps | 210 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 98195 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -217 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19638 |\n", + "| policy_loss | 0.245 |\n", + "| std | 0.921 |\n", + "| value_loss | 0.00902 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 119/600: Total Reward = -10.71\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.6 |\n", + "| ep_rew_mean | -7.91 |\n", + "| success_rate | 0.23 |\n", + "| time/ | |\n", + "| fps | 208 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 99030 |\n", + "| train/ | |\n", + "| entropy_loss | -9.32 |\n", + "| explained_variance | -3.08 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19805 |\n", + "| policy_loss | -6.18 |\n", + "| std | 0.924 |\n", + "| value_loss | 0.401 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 120/600: Total Reward = -1.00\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41.1 |\n", + "| ep_rew_mean | -7.84 |\n", + "| success_rate | 0.23 |\n", + "| time/ | |\n", + "| fps | 139 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 99865 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | 0.511 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 19972 |\n", + "| policy_loss | 22 |\n", + "| std | 0.921 |\n", + "| value_loss | 47.4 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 121/600: Total Reward = -13.90\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40 |\n", + "| ep_rew_mean | -7.45 |\n", + "| success_rate | 0.26 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 100700 |\n", + "| train/ | |\n", + "| entropy_loss | -9.34 |\n", + "| explained_variance | 0.547 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20139 |\n", + "| policy_loss | 3.18 |\n", + "| std | 0.925 |\n", + "| value_loss | 0.112 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 122/600: Total Reward = -9.89\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.3 |\n", + "| ep_rew_mean | -7.26 |\n", + "| success_rate | 0.27 |\n", + "| time/ | |\n", + "| fps | 217 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 101535 |\n", + "| train/ | |\n", + "| entropy_loss | -9.35 |\n", + "| explained_variance | -8.36 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20306 |\n", + "| policy_loss | 4.07 |\n", + "| std | 0.927 |\n", + "| value_loss | 0.185 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 123/600: Total Reward = -4.50\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 41.1 |\n", + "| ep_rew_mean | -7.27 |\n", + "| success_rate | 0.26 |\n", + "| time/ | |\n", + "| fps | 151 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 102370 |\n", + "| train/ | |\n", + "| entropy_loss | -9.33 |\n", + "| explained_variance | -39.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20473 |\n", + "| policy_loss | -1.39 |\n", + "| std | 0.924 |\n", + "| value_loss | 0.0324 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 124/600: Total Reward = -0.05\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 39.9 |\n", + "| ep_rew_mean | -6.73 |\n", + "| success_rate | 0.29 |\n", + "| time/ | |\n", + "| fps | 222 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 103205 |\n", + "| train/ | |\n", + "| entropy_loss | -9.31 |\n", + "| explained_variance | 0.054 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20640 |\n", + "| policy_loss | 0.389 |\n", + "| std | 0.922 |\n", + "| value_loss | 0.022 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 125/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 35.8 |\n", + "| ep_rew_mean | -5.59 |\n", + "| success_rate | 0.38 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 104040 |\n", + "| train/ | |\n", + "| entropy_loss | -9.31 |\n", + "| explained_variance | -19.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20807 |\n", + "| policy_loss | -3.6 |\n", + "| std | 0.922 |\n", + "| value_loss | 0.206 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 126/600: Total Reward = -1.52\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.2 |\n", + "| ep_rew_mean | -4 |\n", + "| success_rate | 0.54 |\n", + "| time/ | |\n", + "| fps | 184 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 104875 |\n", + "| train/ | |\n", + "| entropy_loss | -9.32 |\n", + "| explained_variance | -24.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 20974 |\n", + "| policy_loss | 156 |\n", + "| std | 0.923 |\n", + "| value_loss | 332 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 127/600: Total Reward = -7.57\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.4 |\n", + "| ep_rew_mean | -3.74 |\n", + "| success_rate | 0.57 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 105710 |\n", + "| train/ | |\n", + "| entropy_loss | -9.28 |\n", + "| explained_variance | 0.998 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21141 |\n", + "| policy_loss | -1.53 |\n", + "| std | 0.917 |\n", + "| value_loss | 0.0379 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 128/600: Total Reward = -6.84\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.8 |\n", + "| ep_rew_mean | -4.13 |\n", + "| success_rate | 0.54 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 106545 |\n", + "| train/ | |\n", + "| entropy_loss | -9.26 |\n", + "| explained_variance | 0.406 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21308 |\n", + "| policy_loss | -0.0132 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.00891 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 129/600: Total Reward = -8.64\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.9 |\n", + "| ep_rew_mean | -4.37 |\n", + "| success_rate | 0.53 |\n", + "| time/ | |\n", + "| fps | 141 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 107380 |\n", + "| train/ | |\n", + "| entropy_loss | -9.31 |\n", + "| explained_variance | -165 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21475 |\n", + "| policy_loss | -4.64 |\n", + "| std | 0.922 |\n", + "| value_loss | 0.326 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 130/600: Total Reward = -0.61\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.4 |\n", + "| ep_rew_mean | -4.94 |\n", + "| success_rate | 0.45 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 108215 |\n", + "| train/ | |\n", + "| entropy_loss | -9.34 |\n", + "| explained_variance | -6.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21642 |\n", + "| policy_loss | -1.27 |\n", + "| std | 0.926 |\n", + "| value_loss | 0.0248 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 131/600: Total Reward = -5.58\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.8 |\n", + "| ep_rew_mean | -5 |\n", + "| success_rate | 0.46 |\n", + "| time/ | |\n", + "| fps | 220 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 109050 |\n", + "| train/ | |\n", + "| entropy_loss | -9.35 |\n", + "| explained_variance | 0.822 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21809 |\n", + "| policy_loss | 0.48 |\n", + "| std | 0.928 |\n", + "| value_loss | 0.00381 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 132/600: Total Reward = -0.55\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29 |\n", + "| ep_rew_mean | -4 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 172 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 109885 |\n", + "| train/ | |\n", + "| entropy_loss | -9.4 |\n", + "| explained_variance | -1.83 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 21976 |\n", + "| policy_loss | -2.9 |\n", + "| std | 0.933 |\n", + "| value_loss | 0.206 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 133/600: Total Reward = -4.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.4 |\n", + "| ep_rew_mean | -3.36 |\n", + "| success_rate | 0.66 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 110720 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -0.973 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22143 |\n", + "| policy_loss | -1.01 |\n", + "| std | 0.93 |\n", + "| value_loss | 0.0261 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 134/600: Total Reward = -0.36\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.1 |\n", + "| ep_rew_mean | -3.27 |\n", + "| success_rate | 0.65 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 111555 |\n", + "| train/ | |\n", + "| entropy_loss | -9.36 |\n", + "| explained_variance | -1.43 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22310 |\n", + "| policy_loss | -0.895 |\n", + "| std | 0.928 |\n", + "| value_loss | 0.0205 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 135/600: Total Reward = -5.93\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.4 |\n", + "| ep_rew_mean | -3.42 |\n", + "| success_rate | 0.61 |\n", + "| time/ | |\n", + "| fps | 181 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 112390 |\n", + "| train/ | |\n", + "| entropy_loss | -9.35 |\n", + "| explained_variance | -1.23 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22477 |\n", + "| policy_loss | 3.9 |\n", + "| std | 0.926 |\n", + "| value_loss | 0.184 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 136/600: Total Reward = -7.18\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.2 |\n", + "| ep_rew_mean | -3.74 |\n", + "| success_rate | 0.56 |\n", + "| time/ | |\n", + "| fps | 208 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 113225 |\n", + "| train/ | |\n", + "| entropy_loss | -9.32 |\n", + "| explained_variance | -7.25 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22644 |\n", + "| policy_loss | 2.34 |\n", + "| std | 0.923 |\n", + "| value_loss | 0.122 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 137/600: Total Reward = -1.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.9 |\n", + "| ep_rew_mean | -3.72 |\n", + "| success_rate | 0.56 |\n", + "| time/ | |\n", + "| fps | 223 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 114060 |\n", + "| train/ | |\n", + "| entropy_loss | -9.33 |\n", + "| explained_variance | -156 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22811 |\n", + "| policy_loss | 0.454 |\n", + "| std | 0.923 |\n", + "| value_loss | 0.021 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 138/600: Total Reward = -4.70\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.4 |\n", + "| ep_rew_mean | -3.36 |\n", + "| success_rate | 0.63 |\n", + "| time/ | |\n", + "| fps | 222 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 114895 |\n", + "| train/ | |\n", + "| entropy_loss | -9.28 |\n", + "| explained_variance | -285 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22978 |\n", + "| policy_loss | -4.06 |\n", + "| std | 0.917 |\n", + "| value_loss | 0.48 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 139/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.7 |\n", + "| ep_rew_mean | -3.24 |\n", + "| success_rate | 0.66 |\n", + "| time/ | |\n", + "| fps | 161 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 115730 |\n", + "| train/ | |\n", + "| entropy_loss | -9.29 |\n", + "| explained_variance | -15 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23145 |\n", + "| policy_loss | 3.55 |\n", + "| std | 0.918 |\n", + "| value_loss | 0.196 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 140/600: Total Reward = -1.74\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.2 |\n", + "| ep_rew_mean | -3.04 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 224 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 116565 |\n", + "| train/ | |\n", + "| entropy_loss | -9.29 |\n", + "| explained_variance | -1.83 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23312 |\n", + "| policy_loss | 119 |\n", + "| std | 0.919 |\n", + "| value_loss | 137 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 141/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.3 |\n", + "| ep_rew_mean | -3.01 |\n", + "| success_rate | 0.68 |\n", + "| time/ | |\n", + "| fps | 223 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 117400 |\n", + "| train/ | |\n", + "| entropy_loss | -9.26 |\n", + "| explained_variance | -1.11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23479 |\n", + "| policy_loss | 0.561 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.00464 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 142/600: Total Reward = -0.22\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.7 |\n", + "| ep_rew_mean | -2.75 |\n", + "| success_rate | 0.7 |\n", + "| time/ | |\n", + "| fps | 167 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 118235 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -18.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23646 |\n", + "| policy_loss | 0.984 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.0111 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 143/600: Total Reward = -0.25\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.1 |\n", + "| ep_rew_mean | -2.8 |\n", + "| success_rate | 0.72 |\n", + "| time/ | |\n", + "| fps | 229 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 119070 |\n", + "| train/ | |\n", + "| entropy_loss | -9.23 |\n", + "| explained_variance | -5.45 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23813 |\n", + "| policy_loss | -3.3 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.165 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 144/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.4 |\n", + "| ep_rew_mean | -3.13 |\n", + "| success_rate | 0.65 |\n", + "| time/ | |\n", + "| fps | 229 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 119905 |\n", + "| train/ | |\n", + "| entropy_loss | -9.24 |\n", + "| explained_variance | -196 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23980 |\n", + "| policy_loss | -3.71 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.179 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 145/600: Total Reward = -9.82\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.4 |\n", + "| ep_rew_mean | -3.39 |\n", + "| success_rate | 0.61 |\n", + "| time/ | |\n", + "| fps | 217 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 120740 |\n", + "| train/ | |\n", + "| entropy_loss | -9.23 |\n", + "| explained_variance | 0.293 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24147 |\n", + "| policy_loss | -0.371 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0107 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 146/600: Total Reward = -3.93\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.6 |\n", + "| ep_rew_mean | -3.8 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 170 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 121575 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -2.33 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24314 |\n", + "| policy_loss | 140 |\n", + "| std | 0.91 |\n", + "| value_loss | 242 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 147/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.5 |\n", + "| ep_rew_mean | -3.85 |\n", + "| success_rate | 0.59 |\n", + "| time/ | |\n", + "| fps | 218 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 122410 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -32.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24481 |\n", + "| policy_loss | -1.25 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.0207 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 148/600: Total Reward = -8.44\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.8 |\n", + "| ep_rew_mean | -3.81 |\n", + "| success_rate | 0.62 |\n", + "| time/ | |\n", + "| fps | 217 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 123245 |\n", + "| train/ | |\n", + "| entropy_loss | -9.22 |\n", + "| explained_variance | -29.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24648 |\n", + "| policy_loss | -1.45 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.0323 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 149/600: Total Reward = -5.01\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.4 |\n", + "| ep_rew_mean | -3.93 |\n", + "| success_rate | 0.6 |\n", + "| time/ | |\n", + "| fps | 149 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 124080 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -46.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24815 |\n", + "| policy_loss | -0.0429 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.0296 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 150/600: Total Reward = -3.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.3 |\n", + "| ep_rew_mean | -3.83 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 218 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 124915 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -7.55 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24982 |\n", + "| policy_loss | 2.35 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.0923 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 151/600: Total Reward = -0.45\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30 |\n", + "| ep_rew_mean | -3.76 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 125750 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | 0.649 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25149 |\n", + "| policy_loss | -0.44 |\n", + "| std | 0.906 |\n", + "| value_loss | 0.00379 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 152/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.5 |\n", + "| ep_rew_mean | -3.6 |\n", + "| success_rate | 0.59 |\n", + "| time/ | |\n", + "| fps | 150 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 126585 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -13.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25316 |\n", + "| policy_loss | 0.31 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.036 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 153/600: Total Reward = -5.73\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.1 |\n", + "| ep_rew_mean | -3.73 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 211 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 127420 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | 0.611 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25483 |\n", + "| policy_loss | -3.52 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.136 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 154/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.4 |\n", + "| ep_rew_mean | -3.1 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 128255 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -0.636 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25650 |\n", + "| policy_loss | -0.749 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.0138 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 155/600: Total Reward = -6.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.2 |\n", + "| ep_rew_mean | -3.21 |\n", + "| success_rate | 0.66 |\n", + "| time/ | |\n", + "| fps | 170 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 129090 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -23 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25817 |\n", + "| policy_loss | -1.96 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.05 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 156/600: Total Reward = -0.39\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.6 |\n", + "| ep_rew_mean | -3.27 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 225 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 129925 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -223 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25984 |\n", + "| policy_loss | 0.92 |\n", + "| std | 0.903 |\n", + "| value_loss | 0.0542 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 157/600: Total Reward = -6.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.1 |\n", + "| ep_rew_mean | -3.8 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 130760 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | 0.319 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26151 |\n", + "| policy_loss | -0.288 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.00271 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 158/600: Total Reward = -5.47\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.1 |\n", + "| ep_rew_mean | -4.05 |\n", + "| success_rate | 0.57 |\n", + "| time/ | |\n", + "| fps | 223 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 131595 |\n", + "| train/ | |\n", + "| entropy_loss | -9.14 |\n", + "| explained_variance | -30.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26318 |\n", + "| policy_loss | 0.394 |\n", + "| std | 0.9 |\n", + "| value_loss | 0.0195 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 159/600: Total Reward = -4.66\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.4 |\n", + "| ep_rew_mean | -4.35 |\n", + "| success_rate | 0.51 |\n", + "| time/ | |\n", + "| fps | 164 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 132430 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -32.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26485 |\n", + "| policy_loss | -1.42 |\n", + "| std | 0.903 |\n", + "| value_loss | 0.0771 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 160/600: Total Reward = -10.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.4 |\n", + "| ep_rew_mean | -4.15 |\n", + "| success_rate | 0.55 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 133265 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -2.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26652 |\n", + "| policy_loss | -0.983 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.0579 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 161/600: Total Reward = -11.57\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.5 |\n", + "| ep_rew_mean | -4.11 |\n", + "| success_rate | 0.55 |\n", + "| time/ | |\n", + "| fps | 218 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 134100 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -17 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26819 |\n", + "| policy_loss | 1.21 |\n", + "| std | 0.903 |\n", + "| value_loss | 0.0383 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 162/600: Total Reward = -8.97\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.7 |\n", + "| ep_rew_mean | -4.94 |\n", + "| success_rate | 0.43 |\n", + "| time/ | |\n", + "| fps | 149 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 134935 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -185 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26986 |\n", + "| policy_loss | 2.11 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.0897 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 163/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.7 |\n", + "| ep_rew_mean | -4.66 |\n", + "| success_rate | 0.46 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 135770 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -1.72 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27153 |\n", + "| policy_loss | -1.94 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.0464 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 164/600: Total Reward = -9.47\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.2 |\n", + "| ep_rew_mean | -4.87 |\n", + "| success_rate | 0.41 |\n", + "| time/ | |\n", + "| fps | 204 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 136605 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -1.86 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27320 |\n", + "| policy_loss | 3.43 |\n", + "| std | 0.906 |\n", + "| value_loss | 0.143 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 165/600: Total Reward = -3.94\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.6 |\n", + "| ep_rew_mean | -4.7 |\n", + "| success_rate | 0.42 |\n", + "| time/ | |\n", + "| fps | 133 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 137440 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | 0.91 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27487 |\n", + "| policy_loss | -1.86 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.0335 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 166/600: Total Reward = -8.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.6 |\n", + "| ep_rew_mean | -4.28 |\n", + "| success_rate | 0.49 |\n", + "| time/ | |\n", + "| fps | 209 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 138275 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -110 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27654 |\n", + "| policy_loss | -2.94 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.145 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 167/600: Total Reward = -0.54\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.8 |\n", + "| ep_rew_mean | -3.8 |\n", + "| success_rate | 0.55 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 139110 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -9.12 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27821 |\n", + "| policy_loss | 0.831 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0164 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 168/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.9 |\n", + "| ep_rew_mean | -3.96 |\n", + "| success_rate | 0.5 |\n", + "| time/ | |\n", + "| fps | 143 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 139945 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -0.0492 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27988 |\n", + "| policy_loss | -0.601 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.00722 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 169/600: Total Reward = -5.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34 |\n", + "| ep_rew_mean | -4.02 |\n", + "| success_rate | 0.49 |\n", + "| time/ | |\n", + "| fps | 206 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 140780 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -24.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28155 |\n", + "| policy_loss | -0.982 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.015 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 170/600: Total Reward = -7.87\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.7 |\n", + "| ep_rew_mean | -3.88 |\n", + "| success_rate | 0.51 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 141615 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | 0.0474 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28322 |\n", + "| policy_loss | 19.4 |\n", + "| std | 0.908 |\n", + "| value_loss | 31.3 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 171/600: Total Reward = -7.94\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.4 |\n", + "| ep_rew_mean | -4.23 |\n", + "| success_rate | 0.45 |\n", + "| time/ | |\n", + "| fps | 151 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 142450 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | 0.521 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28489 |\n", + "| policy_loss | 0.0945 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.000956 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 172/600: Total Reward = -0.49\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.5 |\n", + "| ep_rew_mean | -4.6 |\n", + "| success_rate | 0.42 |\n", + "| time/ | |\n", + "| fps | 219 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 143285 |\n", + "| train/ | |\n", + "| entropy_loss | -9.25 |\n", + "| explained_variance | -0.887 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28656 |\n", + "| policy_loss | 0.259 |\n", + "| std | 0.917 |\n", + "| value_loss | 0.00196 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 173/600: Total Reward = -5.48\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.9 |\n", + "| ep_rew_mean | -4.29 |\n", + "| success_rate | 0.47 |\n", + "| time/ | |\n", + "| fps | 203 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 144120 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -1.25 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28823 |\n", + "| policy_loss | -0.0338 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.00284 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 174/600: Total Reward = -4.49\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33 |\n", + "| ep_rew_mean | -4.23 |\n", + "| success_rate | 0.45 |\n", + "| time/ | |\n", + "| fps | 164 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 144955 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -25.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28990 |\n", + "| policy_loss | -0.531 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.00659 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 175/600: Total Reward = -5.97\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.9 |\n", + "| ep_rew_mean | -4.17 |\n", + "| success_rate | 0.46 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 145790 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -5.56 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29157 |\n", + "| policy_loss | -0.733 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.00726 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 176/600: Total Reward = -6.69\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.2 |\n", + "| ep_rew_mean | -3.78 |\n", + "| success_rate | 0.49 |\n", + "| time/ | |\n", + "| fps | 221 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 146625 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -0.676 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29324 |\n", + "| policy_loss | 0.691 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.00542 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 177/600: Total Reward = -4.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30 |\n", + "| ep_rew_mean | -3.63 |\n", + "| success_rate | 0.53 |\n", + "| time/ | |\n", + "| fps | 198 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 147460 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | 0.752 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29491 |\n", + "| policy_loss | 0.533 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.00556 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 178/600: Total Reward = -0.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.2 |\n", + "| ep_rew_mean | -3.42 |\n", + "| success_rate | 0.59 |\n", + "| time/ | |\n", + "| fps | 175 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 148295 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -0.436 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29658 |\n", + "| policy_loss | 1.04 |\n", + "| std | 0.906 |\n", + "| value_loss | 0.0126 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 179/600: Total Reward = -6.74\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29 |\n", + "| ep_rew_mean | -3.63 |\n", + "| success_rate | 0.55 |\n", + "| time/ | |\n", + "| fps | 207 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 149130 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -1.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29825 |\n", + "| policy_loss | 0.507 |\n", + "| std | 0.904 |\n", + "| value_loss | 0.00912 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 180/600: Total Reward = -0.80\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.7 |\n", + "| ep_rew_mean | -3.52 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 160 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 149965 |\n", + "| train/ | |\n", + "| entropy_loss | -9.13 |\n", + "| explained_variance | -0.169 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29992 |\n", + "| policy_loss | 0.41 |\n", + "| std | 0.901 |\n", + "| value_loss | 0.00248 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 181/600: Total Reward = -6.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.7 |\n", + "| ep_rew_mean | -3.8 |\n", + "| success_rate | 0.55 |\n", + "| time/ | |\n", + "| fps | 170 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 150800 |\n", + "| train/ | |\n", + "| entropy_loss | -9.12 |\n", + "| explained_variance | 0.993 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30159 |\n", + "| policy_loss | -0.0252 |\n", + "| std | 0.901 |\n", + "| value_loss | 0.0135 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 182/600: Total Reward = -7.46\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.5 |\n", + "| ep_rew_mean | -3.83 |\n", + "| success_rate | 0.54 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 151635 |\n", + "| train/ | |\n", + "| entropy_loss | -9.11 |\n", + "| explained_variance | -16.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30326 |\n", + "| policy_loss | -1.08 |\n", + "| std | 0.899 |\n", + "| value_loss | 0.0222 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 183/600: Total Reward = -5.67\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30 |\n", + "| ep_rew_mean | -3.66 |\n", + "| success_rate | 0.55 |\n", + "| time/ | |\n", + "| fps | 203 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 152470 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | 0.257 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30493 |\n", + "| policy_loss | -0.514 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.00442 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 184/600: Total Reward = -2.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.4 |\n", + "| ep_rew_mean | -3.87 |\n", + "| success_rate | 0.5 |\n", + "| time/ | |\n", + "| fps | 158 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 153305 |\n", + "| train/ | |\n", + "| entropy_loss | -9.14 |\n", + "| explained_variance | -122 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30660 |\n", + "| policy_loss | -1.05 |\n", + "| std | 0.904 |\n", + "| value_loss | 0.0233 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 185/600: Total Reward = -0.91\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.9 |\n", + "| ep_rew_mean | -3.71 |\n", + "| success_rate | 0.52 |\n", + "| time/ | |\n", + "| fps | 152 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 154140 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | 0.943 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30827 |\n", + "| policy_loss | -0.321 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.00713 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 186/600: Total Reward = -1.05\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32 |\n", + "| ep_rew_mean | -3.82 |\n", + "| success_rate | 0.5 |\n", + "| time/ | |\n", + "| fps | 193 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 154975 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -13.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30994 |\n", + "| policy_loss | 0.985 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.0176 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 187/600: Total Reward = -1.71\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.8 |\n", + "| ep_rew_mean | -3.52 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 167 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 155810 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | 0.218 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31161 |\n", + "| policy_loss | 0.242 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.00146 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 188/600: Total Reward = -8.53\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.4 |\n", + "| ep_rew_mean | -3.39 |\n", + "| success_rate | 0.59 |\n", + "| time/ | |\n", + "| fps | 209 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 156645 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -2.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31328 |\n", + "| policy_loss | -1.3 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.019 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 189/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.1 |\n", + "| ep_rew_mean | -2.97 |\n", + "| success_rate | 0.66 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 157480 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -6.81 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31495 |\n", + "| policy_loss | 0.956 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.0172 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 190/600: Total Reward = -0.82\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.1 |\n", + "| ep_rew_mean | -3.07 |\n", + "| success_rate | 0.66 |\n", + "| time/ | |\n", + "| fps | 147 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 158315 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -5.06 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31662 |\n", + "| policy_loss | 1.43 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.029 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 191/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.2 |\n", + "| ep_rew_mean | -3.17 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 207 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 159150 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | 0.105 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31829 |\n", + "| policy_loss | -0.962 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.0161 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 192/600: Total Reward = -3.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26 |\n", + "| ep_rew_mean | -3.57 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 159985 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | 0.715 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31996 |\n", + "| policy_loss | 2 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0532 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 193/600: Total Reward = -0.25\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.6 |\n", + "| ep_rew_mean | -3.14 |\n", + "| success_rate | 0.71 |\n", + "| time/ | |\n", + "| fps | 133 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 160820 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | 0.0889 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32163 |\n", + "| policy_loss | -5.43 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.34 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 194/600: Total Reward = -0.89\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.9 |\n", + "| ep_rew_mean | -2.79 |\n", + "| success_rate | 0.71 |\n", + "| time/ | |\n", + "| fps | 204 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 161655 |\n", + "| train/ | |\n", + "| entropy_loss | -9.23 |\n", + "| explained_variance | -1.19 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32330 |\n", + "| policy_loss | -0.728 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.0132 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 195/600: Total Reward = -5.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.6 |\n", + "| ep_rew_mean | -2.29 |\n", + "| success_rate | 0.74 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 162490 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -0.883 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32497 |\n", + "| policy_loss | -1.26 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.0244 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 196/600: Total Reward = -7.71\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.7 |\n", + "| ep_rew_mean | -2.51 |\n", + "| success_rate | 0.69 |\n", + "| time/ | |\n", + "| fps | 143 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 163325 |\n", + "| train/ | |\n", + "| entropy_loss | -9.23 |\n", + "| explained_variance | -1.13 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32664 |\n", + "| policy_loss | 0.423 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.00275 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 197/600: Total Reward = -2.45\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.7 |\n", + "| ep_rew_mean | -3.06 |\n", + "| success_rate | 0.6 |\n", + "| time/ | |\n", + "| fps | 208 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 164160 |\n", + "| train/ | |\n", + "| entropy_loss | -9.22 |\n", + "| explained_variance | -1.64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32831 |\n", + "| policy_loss | -0.102 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.000892 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 198/600: Total Reward = -0.22\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.9 |\n", + "| ep_rew_mean | -3.19 |\n", + "| success_rate | 0.59 |\n", + "| time/ | |\n", + "| fps | 211 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 164995 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -7.11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32998 |\n", + "| policy_loss | -0.192 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.00255 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 199/600: Total Reward = -0.80\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.9 |\n", + "| ep_rew_mean | -3.11 |\n", + "| success_rate | 0.6 |\n", + "| time/ | |\n", + "| fps | 158 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 165830 |\n", + "| train/ | |\n", + "| entropy_loss | -9.23 |\n", + "| explained_variance | -3.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33165 |\n", + "| policy_loss | -0.2 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.00918 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 200/600: Total Reward = -4.66\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.4 |\n", + "| ep_rew_mean | -2.25 |\n", + "| success_rate | 0.75 |\n", + "| time/ | |\n", + "| fps | 198 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 166665 |\n", + "| train/ | |\n", + "| entropy_loss | -9.24 |\n", + "| explained_variance | -2.04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33332 |\n", + "| policy_loss | -0.322 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.00257 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 201/600: Total Reward = -6.81\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.1 |\n", + "| ep_rew_mean | -2.33 |\n", + "| success_rate | 0.74 |\n", + "| time/ | |\n", + "| fps | 187 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 167500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.23 |\n", + "| explained_variance | 0.986 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33499 |\n", + "| policy_loss | 0.575 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.0189 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 202/600: Total Reward = -3.75\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.6 |\n", + "| ep_rew_mean | -2.26 |\n", + "| success_rate | 0.72 |\n", + "| time/ | |\n", + "| fps | 154 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 168335 |\n", + "| train/ | |\n", + "| entropy_loss | -9.22 |\n", + "| explained_variance | 0.952 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33666 |\n", + "| policy_loss | 0.714 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.0098 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 203/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.4 |\n", + "| ep_rew_mean | -2.62 |\n", + "| success_rate | 0.67 |\n", + "| time/ | |\n", + "| fps | 196 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 169170 |\n", + "| train/ | |\n", + "| entropy_loss | -9.22 |\n", + "| explained_variance | -1.15 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33833 |\n", + "| policy_loss | 0.0607 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.00175 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 204/600: Total Reward = -6.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.6 |\n", + "| ep_rew_mean | -2.56 |\n", + "| success_rate | 0.7 |\n", + "| time/ | |\n", + "| fps | 204 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 170005 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -3.51 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34000 |\n", + "| policy_loss | 0.171 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.00235 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 205/600: Total Reward = -1.71\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.3 |\n", + "| ep_rew_mean | -2.43 |\n", + "| success_rate | 0.76 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 170840 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -22.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34167 |\n", + "| policy_loss | 0.67 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.00704 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 206/600: Total Reward = -0.18\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.4 |\n", + "| ep_rew_mean | -2 |\n", + "| success_rate | 0.83 |\n", + "| time/ | |\n", + "| fps | 200 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 171675 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -5.58 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34334 |\n", + "| policy_loss | -0.165 |\n", + "| std | 0.902 |\n", + "| value_loss | 0.00268 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 207/600: Total Reward = -0.40\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 19.1 |\n", + "| ep_rew_mean | -1.79 |\n", + "| success_rate | 0.85 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 172510 |\n", + "| train/ | |\n", + "| entropy_loss | -9.12 |\n", + "| explained_variance | -2.41 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34501 |\n", + "| policy_loss | -0.146 |\n", + "| std | 0.899 |\n", + "| value_loss | 0.00391 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 208/600: Total Reward = -0.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 19 |\n", + "| ep_rew_mean | -1.81 |\n", + "| success_rate | 0.85 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 173345 |\n", + "| train/ | |\n", + "| entropy_loss | -9.1 |\n", + "| explained_variance | 0.374 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34668 |\n", + "| policy_loss | 13.3 |\n", + "| std | 0.898 |\n", + "| value_loss | 4.35 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 209/600: Total Reward = -0.48\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 14.1 |\n", + "| ep_rew_mean | -1.28 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 203 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 174180 |\n", + "| train/ | |\n", + "| entropy_loss | -9.08 |\n", + "| explained_variance | -1.16 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34835 |\n", + "| policy_loss | -1.22 |\n", + "| std | 0.894 |\n", + "| value_loss | 0.0206 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 210/600: Total Reward = -0.93\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 15.1 |\n", + "| ep_rew_mean | -1.34 |\n", + "| success_rate | 0.93 |\n", + "| time/ | |\n", + "| fps | 197 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 175015 |\n", + "| train/ | |\n", + "| entropy_loss | -9.08 |\n", + "| explained_variance | 0.668 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35002 |\n", + "| policy_loss | 25.6 |\n", + "| std | 0.895 |\n", + "| value_loss | 7.78 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 211/600: Total Reward = -5.91\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.7 |\n", + "| ep_rew_mean | -1.04 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 147 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 175850 |\n", + "| train/ | |\n", + "| entropy_loss | -9.08 |\n", + "| explained_variance | -0.525 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35169 |\n", + "| policy_loss | -0.53 |\n", + "| std | 0.893 |\n", + "| value_loss | 0.0172 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 212/600: Total Reward = -0.94\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.4 |\n", + "| ep_rew_mean | -1.11 |\n", + "| success_rate | 0.93 |\n", + "| time/ | |\n", + "| fps | 203 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 176685 |\n", + "| train/ | |\n", + "| entropy_loss | -9.06 |\n", + "| explained_variance | -5.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35336 |\n", + "| policy_loss | -2.93 |\n", + "| std | 0.891 |\n", + "| value_loss | 0.103 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 213/600: Total Reward = -0.78\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.1 |\n", + "| ep_rew_mean | -1.12 |\n", + "| success_rate | 0.92 |\n", + "| time/ | |\n", + "| fps | 201 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 177520 |\n", + "| train/ | |\n", + "| entropy_loss | -9.1 |\n", + "| explained_variance | -30.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35503 |\n", + "| policy_loss | 1.19 |\n", + "| std | 0.896 |\n", + "| value_loss | 0.0404 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 214/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 15.5 |\n", + "| ep_rew_mean | -1.67 |\n", + "| success_rate | 0.82 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 178355 |\n", + "| train/ | |\n", + "| entropy_loss | -9.1 |\n", + "| explained_variance | -1.47 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35670 |\n", + "| policy_loss | 0.116 |\n", + "| std | 0.896 |\n", + "| value_loss | 0.00293 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 215/600: Total Reward = -6.69\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.6 |\n", + "| ep_rew_mean | -2.62 |\n", + "| success_rate | 0.69 |\n", + "| time/ | |\n", + "| fps | 204 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 179190 |\n", + "| train/ | |\n", + "| entropy_loss | -9.02 |\n", + "| explained_variance | -61.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35837 |\n", + "| policy_loss | -2.72 |\n", + "| std | 0.885 |\n", + "| value_loss | 0.244 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 216/600: Total Reward = -1.50\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.9 |\n", + "| ep_rew_mean | -2.94 |\n", + "| success_rate | 0.65 |\n", + "| time/ | |\n", + "| fps | 187 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 180025 |\n", + "| train/ | |\n", + "| entropy_loss | -9 |\n", + "| explained_variance | -0.788 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36004 |\n", + "| policy_loss | -4.95 |\n", + "| std | 0.883 |\n", + "| value_loss | 0.366 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 217/600: Total Reward = -6.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28 |\n", + "| ep_rew_mean | -3.39 |\n", + "| success_rate | 0.58 |\n", + "| time/ | |\n", + "| fps | 103 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 180860 |\n", + "| train/ | |\n", + "| entropy_loss | -8.99 |\n", + "| explained_variance | 0.762 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36171 |\n", + "| policy_loss | 1.48 |\n", + "| std | 0.882 |\n", + "| value_loss | 0.0365 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 218/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 19.5 |\n", + "| ep_rew_mean | -2.06 |\n", + "| success_rate | 0.78 |\n", + "| time/ | |\n", + "| fps | 185 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 181695 |\n", + "| train/ | |\n", + "| entropy_loss | -9.02 |\n", + "| explained_variance | -0.553 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36338 |\n", + "| policy_loss | -0.95 |\n", + "| std | 0.884 |\n", + "| value_loss | 0.011 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 219/600: Total Reward = -2.48\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10 |\n", + "| ep_rew_mean | -0.886 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 187 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 182530 |\n", + "| train/ | |\n", + "| entropy_loss | -8.99 |\n", + "| explained_variance | -0.422 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36505 |\n", + "| policy_loss | 0.301 |\n", + "| std | 0.882 |\n", + "| value_loss | 0.00999 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 220/600: Total Reward = -1.54\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.1 |\n", + "| ep_rew_mean | -0.964 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 116 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 183365 |\n", + "| train/ | |\n", + "| entropy_loss | -8.97 |\n", + "| explained_variance | -0.815 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36672 |\n", + "| policy_loss | -0.32 |\n", + "| std | 0.878 |\n", + "| value_loss | 0.00754 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 221/600: Total Reward = -2.05\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.31 |\n", + "| ep_rew_mean | -0.706 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 193 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 184200 |\n", + "| train/ | |\n", + "| entropy_loss | -8.93 |\n", + "| explained_variance | -0.773 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36839 |\n", + "| policy_loss | -0.766 |\n", + "| std | 0.874 |\n", + "| value_loss | 0.0114 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 222/600: Total Reward = -1.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.38 |\n", + "| ep_rew_mean | -0.704 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 179 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 185035 |\n", + "| train/ | |\n", + "| entropy_loss | -8.91 |\n", + "| explained_variance | -0.0423 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37006 |\n", + "| policy_loss | 3.96 |\n", + "| std | 0.872 |\n", + "| value_loss | 0.205 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 223/600: Total Reward = -6.93\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.74 |\n", + "| ep_rew_mean | -0.774 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 127 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 185870 |\n", + "| train/ | |\n", + "| entropy_loss | -8.92 |\n", + "| explained_variance | 0.631 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37173 |\n", + "| policy_loss | 18.8 |\n", + "| std | 0.873 |\n", + "| value_loss | 5.73 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 224/600: Total Reward = -0.48\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.3 |\n", + "| ep_rew_mean | -0.934 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 192 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 186705 |\n", + "| train/ | |\n", + "| entropy_loss | -8.89 |\n", + "| explained_variance | -3.52 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37340 |\n", + "| policy_loss | -1.82 |\n", + "| std | 0.87 |\n", + "| value_loss | 0.0568 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 225/600: Total Reward = -0.79\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.46 |\n", + "| ep_rew_mean | -0.642 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 185 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 187540 |\n", + "| train/ | |\n", + "| entropy_loss | -8.88 |\n", + "| explained_variance | -4.44 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37507 |\n", + "| policy_loss | -6.81 |\n", + "| std | 0.867 |\n", + "| value_loss | 0.506 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 226/600: Total Reward = -1.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.86 |\n", + "| ep_rew_mean | -0.488 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 138 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 188375 |\n", + "| train/ | |\n", + "| entropy_loss | -8.81 |\n", + "| explained_variance | -1.85 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37674 |\n", + "| policy_loss | -0.846 |\n", + "| std | 0.86 |\n", + "| value_loss | 0.0386 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 227/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.11 |\n", + "| ep_rew_mean | -0.511 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 190 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 189210 |\n", + "| train/ | |\n", + "| entropy_loss | -8.82 |\n", + "| explained_variance | -1.58 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37841 |\n", + "| policy_loss | -1.86 |\n", + "| std | 0.861 |\n", + "| value_loss | 0.0434 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 228/600: Total Reward = -0.70\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.29 |\n", + "| ep_rew_mean | -0.697 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 182 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 190045 |\n", + "| train/ | |\n", + "| entropy_loss | -8.81 |\n", + "| explained_variance | -30.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38008 |\n", + "| policy_loss | -5.1 |\n", + "| std | 0.861 |\n", + "| value_loss | 0.516 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 229/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.6 |\n", + "| ep_rew_mean | -0.449 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 144 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 190880 |\n", + "| train/ | |\n", + "| entropy_loss | -8.81 |\n", + "| explained_variance | 0.568 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38175 |\n", + "| policy_loss | -0.317 |\n", + "| std | 0.861 |\n", + "| value_loss | 0.0281 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 230/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.15 |\n", + "| ep_rew_mean | -0.449 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 188 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 191715 |\n", + "| train/ | |\n", + "| entropy_loss | -8.78 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38342 |\n", + "| policy_loss | 2.54 |\n", + "| std | 0.857 |\n", + "| value_loss | 0.0869 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 231/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.98 |\n", + "| ep_rew_mean | -0.404 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 146 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 192550 |\n", + "| train/ | |\n", + "| entropy_loss | -8.76 |\n", + "| explained_variance | 0.21 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38509 |\n", + "| policy_loss | -1.08 |\n", + "| std | 0.854 |\n", + "| value_loss | 0.0193 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 232/600: Total Reward = -0.50\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.76 |\n", + "| ep_rew_mean | -0.491 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 182 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 193385 |\n", + "| train/ | |\n", + "| entropy_loss | -8.76 |\n", + "| explained_variance | -1.11 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38676 |\n", + "| policy_loss | 0.435 |\n", + "| std | 0.854 |\n", + "| value_loss | 0.0292 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 233/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.3 |\n", + "| ep_rew_mean | -0.449 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 185 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 194220 |\n", + "| train/ | |\n", + "| entropy_loss | -8.7 |\n", + "| explained_variance | 0.0392 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38843 |\n", + "| policy_loss | 1.2 |\n", + "| std | 0.847 |\n", + "| value_loss | 0.0312 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 234/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.44 |\n", + "| ep_rew_mean | -0.467 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 132 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 195055 |\n", + "| train/ | |\n", + "| entropy_loss | -8.66 |\n", + "| explained_variance | 0.893 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39010 |\n", + "| policy_loss | -0.816 |\n", + "| std | 0.841 |\n", + "| value_loss | 0.0264 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 235/600: Total Reward = -0.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.98 |\n", + "| ep_rew_mean | -0.409 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 184 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 195890 |\n", + "| train/ | |\n", + "| entropy_loss | -8.63 |\n", + "| explained_variance | -0.695 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39177 |\n", + "| policy_loss | 1.33 |\n", + "| std | 0.838 |\n", + "| value_loss | 0.0371 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 236/600: Total Reward = -0.63\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.19 |\n", + "| ep_rew_mean | -0.537 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 196725 |\n", + "| train/ | |\n", + "| entropy_loss | -8.59 |\n", + "| explained_variance | -1.72 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39344 |\n", + "| policy_loss | -1.76 |\n", + "| std | 0.835 |\n", + "| value_loss | 0.0591 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 237/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.17 |\n", + "| ep_rew_mean | -0.436 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 115 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 197560 |\n", + "| train/ | |\n", + "| entropy_loss | -8.56 |\n", + "| explained_variance | 0.976 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39511 |\n", + "| policy_loss | 0.653 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.00584 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 238/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.35 |\n", + "| ep_rew_mean | -0.539 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 178 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 198395 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | 0.193 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39678 |\n", + "| policy_loss | 0.89 |\n", + "| std | 0.827 |\n", + "| value_loss | 0.0406 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 239/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.32 |\n", + "| ep_rew_mean | -0.706 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 183 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 199230 |\n", + "| train/ | |\n", + "| entropy_loss | -8.56 |\n", + "| explained_variance | -2.36 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39845 |\n", + "| policy_loss | 0.779 |\n", + "| std | 0.831 |\n", + "| value_loss | 0.0213 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 240/600: Total Reward = -0.84\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.29 |\n", + "| ep_rew_mean | -0.448 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 200065 |\n", + "| train/ | |\n", + "| entropy_loss | -8.58 |\n", + "| explained_variance | 0.524 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40012 |\n", + "| policy_loss | 1.72 |\n", + "| std | 0.833 |\n", + "| value_loss | 0.0653 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 241/600: Total Reward = -0.40\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.48 |\n", + "| ep_rew_mean | -0.463 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 179 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 200900 |\n", + "| train/ | |\n", + "| entropy_loss | -8.6 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40179 |\n", + "| policy_loss | 3.08 |\n", + "| std | 0.836 |\n", + "| value_loss | 0.0946 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 242/600: Total Reward = -0.26\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.12 |\n", + "| ep_rew_mean | -0.441 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 176 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 201735 |\n", + "| train/ | |\n", + "| entropy_loss | -8.59 |\n", + "| explained_variance | -1.18 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40346 |\n", + "| policy_loss | -0.844 |\n", + "| std | 0.836 |\n", + "| value_loss | 0.0323 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 243/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.99 |\n", + "| ep_rew_mean | -0.411 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 144 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 202570 |\n", + "| train/ | |\n", + "| entropy_loss | -8.62 |\n", + "| explained_variance | 0.904 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40513 |\n", + "| policy_loss | 0.779 |\n", + "| std | 0.838 |\n", + "| value_loss | 0.0117 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 244/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.26 |\n", + "| ep_rew_mean | -0.349 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 182 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 203405 |\n", + "| train/ | |\n", + "| entropy_loss | -8.64 |\n", + "| explained_variance | -0.423 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40680 |\n", + "| policy_loss | 2.78 |\n", + "| std | 0.841 |\n", + "| value_loss | 0.209 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 245/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.19 |\n", + "| ep_rew_mean | -0.462 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 170 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 204240 |\n", + "| train/ | |\n", + "| entropy_loss | -8.61 |\n", + "| explained_variance | 0.492 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40847 |\n", + "| policy_loss | 1.46 |\n", + "| std | 0.838 |\n", + "| value_loss | 0.038 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 246/600: Total Reward = -0.66\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.66 |\n", + "| ep_rew_mean | -0.487 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 161 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 205075 |\n", + "| train/ | |\n", + "| entropy_loss | -8.61 |\n", + "| explained_variance | 0.502 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41014 |\n", + "| policy_loss | 3.12 |\n", + "| std | 0.836 |\n", + "| value_loss | 0.262 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 247/600: Total Reward = -0.78\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.41 |\n", + "| ep_rew_mean | -0.359 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 181 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 205910 |\n", + "| train/ | |\n", + "| entropy_loss | -8.59 |\n", + "| explained_variance | 0.237 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41181 |\n", + "| policy_loss | -1.45 |\n", + "| std | 0.833 |\n", + "| value_loss | 0.0288 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 248/600: Total Reward = -0.58\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.07 |\n", + "| ep_rew_mean | -0.335 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 141 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 206745 |\n", + "| train/ | |\n", + "| entropy_loss | -8.56 |\n", + "| explained_variance | 0.931 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41348 |\n", + "| policy_loss | -0.0757 |\n", + "| std | 0.831 |\n", + "| value_loss | 0.002 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 249/600: Total Reward = -0.50\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.72 |\n", + "| ep_rew_mean | -0.389 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 181 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 207580 |\n", + "| train/ | |\n", + "| entropy_loss | -8.58 |\n", + "| explained_variance | 0.937 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41515 |\n", + "| policy_loss | -0.357 |\n", + "| std | 0.833 |\n", + "| value_loss | 0.00565 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 250/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.18 |\n", + "| ep_rew_mean | -0.335 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 180 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 208415 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | -2.82 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41682 |\n", + "| policy_loss | 0.889 |\n", + "| std | 0.826 |\n", + "| value_loss | 0.0212 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 251/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.06 |\n", + "| ep_rew_mean | -0.327 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 209250 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | 0.543 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41849 |\n", + "| policy_loss | -1.31 |\n", + "| std | 0.821 |\n", + "| value_loss | 0.0259 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 252/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.67 |\n", + "| ep_rew_mean | -0.397 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 186 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 210085 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -0.409 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42016 |\n", + "| policy_loss | -3.29 |\n", + "| std | 0.822 |\n", + "| value_loss | 0.144 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 253/600: Total Reward = -0.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.65 |\n", + "| ep_rew_mean | -0.384 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 180 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 210920 |\n", + "| train/ | |\n", + "| entropy_loss | -8.48 |\n", + "| explained_variance | 0.409 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42183 |\n", + "| policy_loss | 0.402 |\n", + "| std | 0.82 |\n", + "| value_loss | 0.015 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 254/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.59 |\n", + "| ep_rew_mean | -0.383 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 114 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 211755 |\n", + "| train/ | |\n", + "| entropy_loss | -8.44 |\n", + "| explained_variance | 0.837 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42350 |\n", + "| policy_loss | -0.213 |\n", + "| std | 0.815 |\n", + "| value_loss | 0.00144 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 255/600: Total Reward = -0.26\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.97 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 180 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 212590 |\n", + "| train/ | |\n", + "| entropy_loss | -8.45 |\n", + "| explained_variance | -0.752 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42517 |\n", + "| policy_loss | -0.47 |\n", + "| std | 0.816 |\n", + "| value_loss | 0.0137 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 256/600: Total Reward = -0.54\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4 |\n", + "| ep_rew_mean | -0.313 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 182 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 213425 |\n", + "| train/ | |\n", + "| entropy_loss | -8.42 |\n", + "| explained_variance | 0.854 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42684 |\n", + "| policy_loss | 0.233 |\n", + "| std | 0.812 |\n", + "| value_loss | 0.00342 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 257/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.62 |\n", + "| ep_rew_mean | -0.286 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 109 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 214260 |\n", + "| train/ | |\n", + "| entropy_loss | -8.37 |\n", + "| explained_variance | 0.905 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42851 |\n", + "| policy_loss | 0.138 |\n", + "| std | 0.806 |\n", + "| value_loss | 0.00118 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 258/600: Total Reward = -0.47\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.18 |\n", + "| ep_rew_mean | -0.328 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 183 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 215095 |\n", + "| train/ | |\n", + "| entropy_loss | -8.38 |\n", + "| explained_variance | 0.949 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43018 |\n", + "| policy_loss | -0.629 |\n", + "| std | 0.808 |\n", + "| value_loss | 0.00566 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 259/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.12 |\n", + "| ep_rew_mean | -0.341 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 178 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 215930 |\n", + "| train/ | |\n", + "| entropy_loss | -8.34 |\n", + "| explained_variance | 0.904 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43185 |\n", + "| policy_loss | -0.398 |\n", + "| std | 0.804 |\n", + "| value_loss | 0.00793 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 260/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.56 |\n", + "| ep_rew_mean | -0.292 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 139 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 216765 |\n", + "| train/ | |\n", + "| entropy_loss | -8.27 |\n", + "| explained_variance | 0.964 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43352 |\n", + "| policy_loss | 0.192 |\n", + "| std | 0.797 |\n", + "| value_loss | 0.0019 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 261/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.99 |\n", + "| ep_rew_mean | -0.327 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 181 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 217600 |\n", + "| train/ | |\n", + "| entropy_loss | -8.29 |\n", + "| explained_variance | -4.46 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43519 |\n", + "| policy_loss | -7.2 |\n", + "| std | 0.798 |\n", + "| value_loss | 0.734 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 262/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.49 |\n", + "| ep_rew_mean | -0.278 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 146 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 218435 |\n", + "| train/ | |\n", + "| entropy_loss | -8.26 |\n", + "| explained_variance | 0.463 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43686 |\n", + "| policy_loss | -0.784 |\n", + "| std | 0.795 |\n", + "| value_loss | 0.0102 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 263/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.72 |\n", + "| ep_rew_mean | -0.289 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 174 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 219270 |\n", + "| train/ | |\n", + "| entropy_loss | -8.31 |\n", + "| explained_variance | 0.962 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43853 |\n", + "| policy_loss | -0.765 |\n", + "| std | 0.801 |\n", + "| value_loss | 0.011 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 264/600: Total Reward = -0.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37 |\n", + "| ep_rew_mean | -0.358 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 177 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 220105 |\n", + "| train/ | |\n", + "| entropy_loss | -8.28 |\n", + "| explained_variance | 0.897 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44020 |\n", + "| policy_loss | 1.15 |\n", + "| std | 0.799 |\n", + "| value_loss | 0.0225 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 265/600: Total Reward = -0.40\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.11 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 117 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 220940 |\n", + "| train/ | |\n", + "| entropy_loss | -8.28 |\n", + "| explained_variance | 0.227 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44187 |\n", + "| policy_loss | -0.049 |\n", + "| std | 0.798 |\n", + "| value_loss | 0.0119 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 266/600: Total Reward = -0.48\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.81 |\n", + "| ep_rew_mean | -0.395 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 165 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 221775 |\n", + "| train/ | |\n", + "| entropy_loss | -8.3 |\n", + "| explained_variance | 0.578 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44354 |\n", + "| policy_loss | 2.15 |\n", + "| std | 0.801 |\n", + "| value_loss | 0.0702 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 267/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.5 |\n", + "| ep_rew_mean | -0.361 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 175 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 222610 |\n", + "| train/ | |\n", + "| entropy_loss | -8.31 |\n", + "| explained_variance | 0.345 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44521 |\n", + "| policy_loss | -0.437 |\n", + "| std | 0.802 |\n", + "| value_loss | 0.0483 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 268/600: Total Reward = -0.48\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 110 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 223445 |\n", + "| train/ | |\n", + "| entropy_loss | -8.3 |\n", + "| explained_variance | 0.916 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44688 |\n", + "| policy_loss | -0.694 |\n", + "| std | 0.801 |\n", + "| value_loss | 0.00848 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 269/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.26 |\n", + "| ep_rew_mean | -0.341 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 173 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 224280 |\n", + "| train/ | |\n", + "| entropy_loss | -8.27 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44855 |\n", + "| policy_loss | 0.0128 |\n", + "| std | 0.799 |\n", + "| value_loss | 0.000261 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 270/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37 |\n", + "| ep_rew_mean | -0.368 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 168 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 225115 |\n", + "| train/ | |\n", + "| entropy_loss | -8.25 |\n", + "| explained_variance | 0.51 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45022 |\n", + "| policy_loss | 0.065 |\n", + "| std | 0.798 |\n", + "| value_loss | 0.00276 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 271/600: Total Reward = -0.72\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.06 |\n", + "| ep_rew_mean | -0.328 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 152 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 225950 |\n", + "| train/ | |\n", + "| entropy_loss | -8.24 |\n", + "| explained_variance | 0.941 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45189 |\n", + "| policy_loss | 0.162 |\n", + "| std | 0.796 |\n", + "| value_loss | 0.00142 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 272/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.75 |\n", + "| ep_rew_mean | -0.311 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 173 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 226785 |\n", + "| train/ | |\n", + "| entropy_loss | -8.27 |\n", + "| explained_variance | 0.865 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45356 |\n", + "| policy_loss | 0.498 |\n", + "| std | 0.8 |\n", + "| value_loss | 0.00475 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 273/600: Total Reward = -0.26\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.9 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 227620 |\n", + "| train/ | |\n", + "| entropy_loss | -8.29 |\n", + "| explained_variance | 0.944 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45523 |\n", + "| policy_loss | -0.13 |\n", + "| std | 0.802 |\n", + "| value_loss | 0.00123 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 274/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.11 |\n", + "| ep_rew_mean | -0.411 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 169 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 228455 |\n", + "| train/ | |\n", + "| entropy_loss | -8.26 |\n", + "| explained_variance | 0.922 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45690 |\n", + "| policy_loss | -0.35 |\n", + "| std | 0.8 |\n", + "| value_loss | 0.00269 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 275/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.9 |\n", + "| ep_rew_mean | -0.412 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 168 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 229290 |\n", + "| train/ | |\n", + "| entropy_loss | -8.24 |\n", + "| explained_variance | 0.972 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45857 |\n", + "| policy_loss | 0.132 |\n", + "| std | 0.798 |\n", + "| value_loss | 0.00173 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 276/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.14 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 107 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 230125 |\n", + "| train/ | |\n", + "| entropy_loss | -8.22 |\n", + "| explained_variance | 0.849 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46024 |\n", + "| policy_loss | -0.299 |\n", + "| std | 0.795 |\n", + "| value_loss | 0.0095 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 277/600: Total Reward = -0.40\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.92 |\n", + "| ep_rew_mean | -0.309 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 230960 |\n", + "| train/ | |\n", + "| entropy_loss | -8.23 |\n", + "| explained_variance | -0.362 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46191 |\n", + "| policy_loss | -0.0236 |\n", + "| std | 0.796 |\n", + "| value_loss | 0.00693 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 278/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.31 |\n", + "| ep_rew_mean | -0.347 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 231795 |\n", + "| train/ | |\n", + "| entropy_loss | -8.23 |\n", + "| explained_variance | 0.857 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46358 |\n", + "| policy_loss | 0.034 |\n", + "| std | 0.797 |\n", + "| value_loss | 0.00192 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 279/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.32 |\n", + "| ep_rew_mean | -0.358 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 232630 |\n", + "| train/ | |\n", + "| entropy_loss | -8.23 |\n", + "| explained_variance | -0.422 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46525 |\n", + "| policy_loss | 1.29 |\n", + "| std | 0.798 |\n", + "| value_loss | 0.0457 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 280/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 165 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 233465 |\n", + "| train/ | |\n", + "| entropy_loss | -8.19 |\n", + "| explained_variance | -1.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46692 |\n", + "| policy_loss | 1.94 |\n", + "| std | 0.794 |\n", + "| value_loss | 0.0631 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 281/600: Total Reward = -0.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.1 |\n", + "| ep_rew_mean | -0.334 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 234300 |\n", + "| train/ | |\n", + "| entropy_loss | -8.2 |\n", + "| explained_variance | 0.831 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46859 |\n", + "| policy_loss | 0.317 |\n", + "| std | 0.796 |\n", + "| value_loss | 0.00502 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 282/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.1 |\n", + "| ep_rew_mean | -0.403 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 163 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 235135 |\n", + "| train/ | |\n", + "| entropy_loss | -8.18 |\n", + "| explained_variance | 0.979 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47026 |\n", + "| policy_loss | 0.261 |\n", + "| std | 0.793 |\n", + "| value_loss | 0.00226 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 283/600: Total Reward = -0.59\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.15 |\n", + "| ep_rew_mean | -0.348 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 166 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 235970 |\n", + "| train/ | |\n", + "| entropy_loss | -8.15 |\n", + "| explained_variance | 0.453 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47193 |\n", + "| policy_loss | 0.801 |\n", + "| std | 0.79 |\n", + "| value_loss | 0.0121 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 284/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.78 |\n", + "| ep_rew_mean | -0.309 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 131 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 236805 |\n", + "| train/ | |\n", + "| entropy_loss | -8.13 |\n", + "| explained_variance | 0.815 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47360 |\n", + "| policy_loss | 0.136 |\n", + "| std | 0.788 |\n", + "| value_loss | 0.00228 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 285/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 174 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 237640 |\n", + "| train/ | |\n", + "| entropy_loss | -8.14 |\n", + "| explained_variance | 0.934 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47527 |\n", + "| policy_loss | -0.34 |\n", + "| std | 0.789 |\n", + "| value_loss | 0.00223 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 286/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.62 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 238475 |\n", + "| train/ | |\n", + "| entropy_loss | -8.11 |\n", + "| explained_variance | 0.999 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47694 |\n", + "| policy_loss | -0.0206 |\n", + "| std | 0.786 |\n", + "| value_loss | 1.32e-05 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 287/600: Total Reward = -0.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.76 |\n", + "| ep_rew_mean | -0.305 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 103 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 239310 |\n", + "| train/ | |\n", + "| entropy_loss | -8.08 |\n", + "| explained_variance | 0.906 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47861 |\n", + "| policy_loss | -0.327 |\n", + "| std | 0.782 |\n", + "| value_loss | 0.00773 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 288/600: Total Reward = -0.83\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.354 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 169 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 240145 |\n", + "| train/ | |\n", + "| entropy_loss | -8.09 |\n", + "| explained_variance | 0.735 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48028 |\n", + "| policy_loss | -0.576 |\n", + "| std | 0.783 |\n", + "| value_loss | 0.00896 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 289/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.67 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 150 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 240980 |\n", + "| train/ | |\n", + "| entropy_loss | -8.03 |\n", + "| explained_variance | 0.781 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48195 |\n", + "| policy_loss | -0.748 |\n", + "| std | 0.777 |\n", + "| value_loss | 0.0128 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 290/600: Total Reward = -0.56\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.97 |\n", + "| ep_rew_mean | -0.324 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 147 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 241815 |\n", + "| train/ | |\n", + "| entropy_loss | -8.03 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48362 |\n", + "| policy_loss | 0.142 |\n", + "| std | 0.778 |\n", + "| value_loss | 0.000477 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 291/600: Total Reward = -0.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4 |\n", + "| ep_rew_mean | -0.33 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 242650 |\n", + "| train/ | |\n", + "| entropy_loss | -8.02 |\n", + "| explained_variance | 0.885 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48529 |\n", + "| policy_loss | 0.947 |\n", + "| std | 0.778 |\n", + "| value_loss | 0.0106 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 292/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.81 |\n", + "| ep_rew_mean | -0.305 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 243485 |\n", + "| train/ | |\n", + "| entropy_loss | -8 |\n", + "| explained_variance | 0.962 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48696 |\n", + "| policy_loss | -0.233 |\n", + "| std | 0.777 |\n", + "| value_loss | 0.0016 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 293/600: Total Reward = -0.39\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.94 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 173 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 244320 |\n", + "| train/ | |\n", + "| entropy_loss | -8.01 |\n", + "| explained_variance | 0.662 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48863 |\n", + "| policy_loss | -0.376 |\n", + "| std | 0.779 |\n", + "| value_loss | 0.00556 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 294/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.87 |\n", + "| ep_rew_mean | -0.3 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 245155 |\n", + "| train/ | |\n", + "| entropy_loss | -8.01 |\n", + "| explained_variance | 0.974 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49030 |\n", + "| policy_loss | -0.175 |\n", + "| std | 0.779 |\n", + "| value_loss | 0.00198 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 295/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.98 |\n", + "| ep_rew_mean | -0.327 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 245990 |\n", + "| train/ | |\n", + "| entropy_loss | -8.03 |\n", + "| explained_variance | 0.577 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49197 |\n", + "| policy_loss | -0.161 |\n", + "| std | 0.781 |\n", + "| value_loss | 0.00259 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 296/600: Total Reward = -1.01\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.83 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 246825 |\n", + "| train/ | |\n", + "| entropy_loss | -8.04 |\n", + "| explained_variance | 0.834 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49364 |\n", + "| policy_loss | -0.0213 |\n", + "| std | 0.783 |\n", + "| value_loss | 0.00218 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 297/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.79 |\n", + "| ep_rew_mean | -0.305 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 142 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 247660 |\n", + "| train/ | |\n", + "| entropy_loss | -8.03 |\n", + "| explained_variance | 0.85 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49531 |\n", + "| policy_loss | -0.581 |\n", + "| std | 0.781 |\n", + "| value_loss | 0.00537 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 298/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.79 |\n", + "| ep_rew_mean | -0.294 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 166 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 248495 |\n", + "| train/ | |\n", + "| entropy_loss | -8.02 |\n", + "| explained_variance | 0.848 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49698 |\n", + "| policy_loss | -0.289 |\n", + "| std | 0.782 |\n", + "| value_loss | 0.00214 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 299/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.42 |\n", + "| ep_rew_mean | -0.261 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 171 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 249330 |\n", + "| train/ | |\n", + "| entropy_loss | -8 |\n", + "| explained_variance | 0.592 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49865 |\n", + "| policy_loss | 0.287 |\n", + "| std | 0.779 |\n", + "| value_loss | 0.00604 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 300/600: Total Reward = -0.24\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.73 |\n", + "| ep_rew_mean | -0.303 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 101 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 250165 |\n", + "| train/ | |\n", + "| entropy_loss | -8.05 |\n", + "| explained_variance | 0.451 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 50032 |\n", + "| policy_loss | -1.02 |\n", + "| std | 0.784 |\n", + "| value_loss | 0.0244 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 301/600: Total Reward = -0.25\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.68 |\n", + "| ep_rew_mean | -0.3 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 163 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 251000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.07 |\n", + "| explained_variance | 0.758 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 50199 |\n", + "| policy_loss | -0.582 |\n", + "| std | 0.787 |\n", + "| value_loss | 0.00691 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 302/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.49 |\n", + "| ep_rew_mean | -0.282 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 159 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 251835 |\n", + "| train/ | |\n", + "| entropy_loss | -8.04 |\n", + "| explained_variance | 0.972 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 50366 |\n", + "| policy_loss | 0.548 |\n", + "| std | 0.783 |\n", + "| value_loss | 0.00463 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 303/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.49 |\n", + "| ep_rew_mean | -0.362 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 252670 |\n", + "| train/ | |\n", + "| entropy_loss | -7.98 |\n", + "| explained_variance | 0.61 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 50533 |\n", + "| policy_loss | 0.126 |\n", + "| std | 0.778 |\n", + "| value_loss | 0.00163 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 304/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.78 |\n", + "| ep_rew_mean | -0.313 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 159 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 253505 |\n", + "| train/ | |\n", + "| entropy_loss | -7.96 |\n", + "| explained_variance | 0.977 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 50700 |\n", + "| policy_loss | -0.367 |\n", + "| std | 0.776 |\n", + "| value_loss | 0.0043 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 305/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.19 |\n", + "| ep_rew_mean | -0.338 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 113 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 254340 |\n", + "| train/ | |\n", + "| entropy_loss | -7.93 |\n", + "| explained_variance | 0.52 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 50867 |\n", + "| policy_loss | 0.867 |\n", + "| std | 0.772 |\n", + "| value_loss | 0.0199 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 306/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.14 |\n", + "| ep_rew_mean | -0.347 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 161 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 255175 |\n", + "| train/ | |\n", + "| entropy_loss | -7.92 |\n", + "| explained_variance | 0.873 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 51034 |\n", + "| policy_loss | -0.0862 |\n", + "| std | 0.771 |\n", + "| value_loss | 0.0012 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 307/600: Total Reward = -0.55\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.331 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 166 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 256010 |\n", + "| train/ | |\n", + "| entropy_loss | -7.91 |\n", + "| explained_variance | 0.83 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 51201 |\n", + "| policy_loss | -0.842 |\n", + "| std | 0.772 |\n", + "| value_loss | 0.0136 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 308/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.1 |\n", + "| ep_rew_mean | -0.337 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 115 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 256845 |\n", + "| train/ | |\n", + "| entropy_loss | -7.92 |\n", + "| explained_variance | -0.238 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 51368 |\n", + "| policy_loss | -1.06 |\n", + "| std | 0.774 |\n", + "| value_loss | 0.0275 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 309/600: Total Reward = -0.55\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.53 |\n", + "| ep_rew_mean | -0.276 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 159 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 257680 |\n", + "| train/ | |\n", + "| entropy_loss | -7.88 |\n", + "| explained_variance | 0.998 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 51535 |\n", + "| policy_loss | 0.0767 |\n", + "| std | 0.768 |\n", + "| value_loss | 0.000135 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 310/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.75 |\n", + "| ep_rew_mean | -0.302 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 258515 |\n", + "| train/ | |\n", + "| entropy_loss | -7.91 |\n", + "| explained_variance | 0.96 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 51702 |\n", + "| policy_loss | -0.332 |\n", + "| std | 0.772 |\n", + "| value_loss | 0.00177 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 311/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.68 |\n", + "| ep_rew_mean | -0.297 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 153 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 259350 |\n", + "| train/ | |\n", + "| entropy_loss | -7.91 |\n", + "| explained_variance | 0.977 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 51869 |\n", + "| policy_loss | 0.335 |\n", + "| std | 0.771 |\n", + "| value_loss | 0.00231 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 312/600: Total Reward = -0.31\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.59 |\n", + "| ep_rew_mean | -0.278 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 156 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 260185 |\n", + "| train/ | |\n", + "| entropy_loss | -7.9 |\n", + "| explained_variance | -0.321 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 52036 |\n", + "| policy_loss | -0.727 |\n", + "| std | 0.771 |\n", + "| value_loss | 0.0209 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 313/600: Total Reward = -0.35\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.55 |\n", + "| ep_rew_mean | -0.366 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 104 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 261020 |\n", + "| train/ | |\n", + "| entropy_loss | -7.89 |\n", + "| explained_variance | 0.935 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 52203 |\n", + "| policy_loss | 0.273 |\n", + "| std | 0.769 |\n", + "| value_loss | 0.00618 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 314/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.29 |\n", + "| ep_rew_mean | -0.355 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 158 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 261855 |\n", + "| train/ | |\n", + "| entropy_loss | -7.84 |\n", + "| explained_variance | 0.829 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 52370 |\n", + "| policy_loss | 0.387 |\n", + "| std | 0.764 |\n", + "| value_loss | 0.0132 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 315/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.85 |\n", + "| ep_rew_mean | -0.313 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 262690 |\n", + "| train/ | |\n", + "| entropy_loss | -7.8 |\n", + "| explained_variance | 0.889 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 52537 |\n", + "| policy_loss | 0.414 |\n", + "| std | 0.76 |\n", + "| value_loss | 0.00744 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 316/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.47 |\n", + "| ep_rew_mean | -0.369 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 167 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 263525 |\n", + "| train/ | |\n", + "| entropy_loss | -7.79 |\n", + "| explained_variance | 0.999 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 52704 |\n", + "| policy_loss | -0.175 |\n", + "| std | 0.759 |\n", + "| value_loss | 0.000755 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 317/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.99 |\n", + "| ep_rew_mean | -0.397 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 163 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 264360 |\n", + "| train/ | |\n", + "| entropy_loss | -7.77 |\n", + "| explained_variance | 0.302 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 52871 |\n", + "| policy_loss | -1.09 |\n", + "| std | 0.755 |\n", + "| value_loss | 0.0198 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 318/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.32 |\n", + "| ep_rew_mean | -0.345 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 99 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 265195 |\n", + "| train/ | |\n", + "| entropy_loss | -7.76 |\n", + "| explained_variance | 0.984 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 53038 |\n", + "| policy_loss | 0.475 |\n", + "| std | 0.755 |\n", + "| value_loss | 0.00258 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 319/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.94 |\n", + "| ep_rew_mean | -0.316 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 156 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 266030 |\n", + "| train/ | |\n", + "| entropy_loss | -7.76 |\n", + "| explained_variance | 0.273 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 53205 |\n", + "| policy_loss | 0.548 |\n", + "| std | 0.755 |\n", + "| value_loss | 0.0082 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 320/600: Total Reward = -0.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.36 |\n", + "| ep_rew_mean | -0.362 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 138 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 266865 |\n", + "| train/ | |\n", + "| entropy_loss | -7.75 |\n", + "| explained_variance | -0.104 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 53372 |\n", + "| policy_loss | 0.37 |\n", + "| std | 0.755 |\n", + "| value_loss | 0.0116 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 321/600: Total Reward = -0.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.326 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 160 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 267700 |\n", + "| train/ | |\n", + "| entropy_loss | -7.74 |\n", + "| explained_variance | 0.78 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 53539 |\n", + "| policy_loss | -1.33 |\n", + "| std | 0.754 |\n", + "| value_loss | 0.0152 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 322/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.335 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 154 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 268535 |\n", + "| train/ | |\n", + "| entropy_loss | -7.71 |\n", + "| explained_variance | 0.929 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 53706 |\n", + "| policy_loss | -0.119 |\n", + "| std | 0.75 |\n", + "| value_loss | 0.000786 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 323/600: Total Reward = -0.52\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.09 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 94 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 269370 |\n", + "| train/ | |\n", + "| entropy_loss | -7.72 |\n", + "| explained_variance | 0.995 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 53873 |\n", + "| policy_loss | -0.0493 |\n", + "| std | 0.752 |\n", + "| value_loss | 0.000374 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 324/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.26 |\n", + "| ep_rew_mean | -0.353 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 156 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 270205 |\n", + "| train/ | |\n", + "| entropy_loss | -7.65 |\n", + "| explained_variance | 0.818 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 54040 |\n", + "| policy_loss | -0.435 |\n", + "| std | 0.745 |\n", + "| value_loss | 0.00407 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 325/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.72 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 126 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 271040 |\n", + "| train/ | |\n", + "| entropy_loss | -7.62 |\n", + "| explained_variance | 0.133 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 54207 |\n", + "| policy_loss | -1.29 |\n", + "| std | 0.742 |\n", + "| value_loss | 0.035 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 326/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 161 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 271875 |\n", + "| train/ | |\n", + "| entropy_loss | -7.63 |\n", + "| explained_variance | 0.995 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 54374 |\n", + "| policy_loss | 0.311 |\n", + "| std | 0.743 |\n", + "| value_loss | 0.00186 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 327/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.6 |\n", + "| ep_rew_mean | -0.286 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 272710 |\n", + "| train/ | |\n", + "| entropy_loss | -7.61 |\n", + "| explained_variance | 0.842 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 54541 |\n", + "| policy_loss | -0.347 |\n", + "| std | 0.742 |\n", + "| value_loss | 0.00215 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 328/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 96 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 273545 |\n", + "| train/ | |\n", + "| entropy_loss | -7.6 |\n", + "| explained_variance | 0.999 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 54708 |\n", + "| policy_loss | 0.275 |\n", + "| std | 0.739 |\n", + "| value_loss | 0.00136 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 329/600: Total Reward = -0.64\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.64 |\n", + "| ep_rew_mean | -0.291 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 154 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 274380 |\n", + "| train/ | |\n", + "| entropy_loss | -7.55 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 54875 |\n", + "| policy_loss | 0.00573 |\n", + "| std | 0.735 |\n", + "| value_loss | 9.15e-05 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 330/600: Total Reward = -0.41\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.74 |\n", + "| ep_rew_mean | -0.37 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 275215 |\n", + "| train/ | |\n", + "| entropy_loss | -7.53 |\n", + "| explained_variance | -0.266 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 55042 |\n", + "| policy_loss | -0.517 |\n", + "| std | 0.732 |\n", + "| value_loss | 0.0492 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 331/600: Total Reward = -0.68\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.92 |\n", + "| ep_rew_mean | -0.321 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 158 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 276050 |\n", + "| train/ | |\n", + "| entropy_loss | -7.52 |\n", + "| explained_variance | 0.838 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 55209 |\n", + "| policy_loss | -0.0426 |\n", + "| std | 0.731 |\n", + "| value_loss | 0.0015 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 332/600: Total Reward = -0.56\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.41 |\n", + "| ep_rew_mean | -0.271 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 276885 |\n", + "| train/ | |\n", + "| entropy_loss | -7.49 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 55376 |\n", + "| policy_loss | 0.0259 |\n", + "| std | 0.728 |\n", + "| value_loss | 6.44e-05 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 333/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.302 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 101 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 277720 |\n", + "| train/ | |\n", + "| entropy_loss | -7.49 |\n", + "| explained_variance | 0.876 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 55543 |\n", + "| policy_loss | -0.289 |\n", + "| std | 0.728 |\n", + "| value_loss | 0.00203 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 334/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.308 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 150 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 278555 |\n", + "| train/ | |\n", + "| entropy_loss | -7.47 |\n", + "| explained_variance | 0.149 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 55710 |\n", + "| policy_loss | -0.174 |\n", + "| std | 0.725 |\n", + "| value_loss | 0.0118 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 335/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.331 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 96 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 279390 |\n", + "| train/ | |\n", + "| entropy_loss | -7.47 |\n", + "| explained_variance | 0.778 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 55877 |\n", + "| policy_loss | 0.398 |\n", + "| std | 0.726 |\n", + "| value_loss | 0.00438 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 336/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 154 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 280225 |\n", + "| train/ | |\n", + "| entropy_loss | -7.44 |\n", + "| explained_variance | 0.687 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 56044 |\n", + "| policy_loss | -0.138 |\n", + "| std | 0.723 |\n", + "| value_loss | 0.0141 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 337/600: Total Reward = -0.57\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.2 |\n", + "| ep_rew_mean | -0.337 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 151 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 281060 |\n", + "| train/ | |\n", + "| entropy_loss | -7.42 |\n", + "| explained_variance | 0.743 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 56211 |\n", + "| policy_loss | -0.217 |\n", + "| std | 0.719 |\n", + "| value_loss | 0.00135 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 338/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.66 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 281895 |\n", + "| train/ | |\n", + "| entropy_loss | -7.38 |\n", + "| explained_variance | 0.83 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 56378 |\n", + "| policy_loss | -0.06 |\n", + "| std | 0.715 |\n", + "| value_loss | 0.00335 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 339/600: Total Reward = -0.41\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.09 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 152 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 282730 |\n", + "| train/ | |\n", + "| entropy_loss | -7.34 |\n", + "| explained_variance | 0.754 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 56545 |\n", + "| policy_loss | -0.116 |\n", + "| std | 0.71 |\n", + "| value_loss | 0.00333 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 340/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.76 |\n", + "| ep_rew_mean | -0.295 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 91 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 283565 |\n", + "| train/ | |\n", + "| entropy_loss | -7.31 |\n", + "| explained_variance | 0.94 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 56712 |\n", + "| policy_loss | 1 |\n", + "| std | 0.707 |\n", + "| value_loss | 0.0265 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 341/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.28 |\n", + "| ep_rew_mean | -0.352 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 150 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 284400 |\n", + "| train/ | |\n", + "| entropy_loss | -7.27 |\n", + "| explained_variance | 0.913 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 56879 |\n", + "| policy_loss | -0.7 |\n", + "| std | 0.703 |\n", + "| value_loss | 0.00735 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 342/600: Total Reward = -4.01\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.312 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 118 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 285235 |\n", + "| train/ | |\n", + "| entropy_loss | -7.26 |\n", + "| explained_variance | 0.932 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 57046 |\n", + "| policy_loss | 0.247 |\n", + "| std | 0.703 |\n", + "| value_loss | 0.00241 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 343/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.81 |\n", + "| ep_rew_mean | -0.306 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 147 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 286070 |\n", + "| train/ | |\n", + "| entropy_loss | -7.26 |\n", + "| explained_variance | 0.858 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 57213 |\n", + "| policy_loss | -0.306 |\n", + "| std | 0.705 |\n", + "| value_loss | 0.00562 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 344/600: Total Reward = -0.54\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.79 |\n", + "| ep_rew_mean | -0.312 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 151 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 286905 |\n", + "| train/ | |\n", + "| entropy_loss | -7.24 |\n", + "| explained_variance | 0.98 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 57380 |\n", + "| policy_loss | -0.244 |\n", + "| std | 0.702 |\n", + "| value_loss | 0.00104 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 345/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.68 |\n", + "| ep_rew_mean | -0.302 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 105 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 287740 |\n", + "| train/ | |\n", + "| entropy_loss | -7.21 |\n", + "| explained_variance | 0.963 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 57547 |\n", + "| policy_loss | -0.113 |\n", + "| std | 0.7 |\n", + "| value_loss | 0.000849 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 346/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.98 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 150 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 288575 |\n", + "| train/ | |\n", + "| entropy_loss | -7.2 |\n", + "| explained_variance | 0.79 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 57714 |\n", + "| policy_loss | 0.969 |\n", + "| std | 0.699 |\n", + "| value_loss | 0.0186 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 347/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.9 |\n", + "| ep_rew_mean | -0.403 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 93 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 289410 |\n", + "| train/ | |\n", + "| entropy_loss | -7.19 |\n", + "| explained_variance | 0.226 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 57881 |\n", + "| policy_loss | 1.94 |\n", + "| std | 0.699 |\n", + "| value_loss | 0.0913 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 348/600: Total Reward = -0.63\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.05 |\n", + "| ep_rew_mean | -0.398 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 141 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 290245 |\n", + "| train/ | |\n", + "| entropy_loss | -7.21 |\n", + "| explained_variance | -0.041 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 58048 |\n", + "| policy_loss | 3.06 |\n", + "| std | 0.701 |\n", + "| value_loss | 0.192 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 349/600: Total Reward = -0.53\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.14 |\n", + "| ep_rew_mean | -0.422 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 136 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 291080 |\n", + "| train/ | |\n", + "| entropy_loss | -7.18 |\n", + "| explained_variance | 0.353 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 58215 |\n", + "| policy_loss | -1.94 |\n", + "| std | 0.699 |\n", + "| value_loss | 0.0869 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 350/600: Total Reward = -0.60\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.42 |\n", + "| ep_rew_mean | -0.355 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 147 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 291915 |\n", + "| train/ | |\n", + "| entropy_loss | -7.14 |\n", + "| explained_variance | 0.989 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 58382 |\n", + "| policy_loss | -0.235 |\n", + "| std | 0.694 |\n", + "| value_loss | 0.000922 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 351/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.76 |\n", + "| ep_rew_mean | -0.38 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 153 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 292750 |\n", + "| train/ | |\n", + "| entropy_loss | -7.11 |\n", + "| explained_variance | 0.465 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 58549 |\n", + "| policy_loss | -0.152 |\n", + "| std | 0.691 |\n", + "| value_loss | 0.00388 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 352/600: Total Reward = -0.63\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.62 |\n", + "| ep_rew_mean | -0.472 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 88 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 293585 |\n", + "| train/ | |\n", + "| entropy_loss | -7.12 |\n", + "| explained_variance | 0.116 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 58716 |\n", + "| policy_loss | 3.93 |\n", + "| std | 0.691 |\n", + "| value_loss | 0.635 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 353/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.13 |\n", + "| ep_rew_mean | -0.415 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 138 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 294420 |\n", + "| train/ | |\n", + "| entropy_loss | -7.12 |\n", + "| explained_variance | 0.97 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 58883 |\n", + "| policy_loss | -0.141 |\n", + "| std | 0.69 |\n", + "| value_loss | 0.000917 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 354/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.3 |\n", + "| ep_rew_mean | -0.337 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 114 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 295255 |\n", + "| train/ | |\n", + "| entropy_loss | -7.11 |\n", + "| explained_variance | 0.248 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 59050 |\n", + "| policy_loss | -0.335 |\n", + "| std | 0.689 |\n", + "| value_loss | 0.00308 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 355/600: Total Reward = -0.35\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.67 |\n", + "| ep_rew_mean | -0.288 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 296090 |\n", + "| train/ | |\n", + "| entropy_loss | -7.09 |\n", + "| explained_variance | 0.768 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 59217 |\n", + "| policy_loss | 0.0985 |\n", + "| std | 0.687 |\n", + "| value_loss | 0.00177 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 356/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 150 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 296925 |\n", + "| train/ | |\n", + "| entropy_loss | -7.09 |\n", + "| explained_variance | 0.978 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 59384 |\n", + "| policy_loss | 0.166 |\n", + "| std | 0.687 |\n", + "| value_loss | 0.00177 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 357/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.31 |\n", + "| ep_rew_mean | -0.265 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 99 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 297760 |\n", + "| train/ | |\n", + "| entropy_loss | -7.09 |\n", + "| explained_variance | 0.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 59551 |\n", + "| policy_loss | 0.575 |\n", + "| std | 0.688 |\n", + "| value_loss | 0.00648 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 358/600: Total Reward = -0.75\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.46 |\n", + "| ep_rew_mean | -0.271 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 142 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 298595 |\n", + "| train/ | |\n", + "| entropy_loss | -7.07 |\n", + "| explained_variance | -0.111 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 59718 |\n", + "| policy_loss | 0.372 |\n", + "| std | 0.686 |\n", + "| value_loss | 0.00538 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 359/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.91 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 94 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 299430 |\n", + "| train/ | |\n", + "| entropy_loss | -7.06 |\n", + "| explained_variance | 0.942 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 59885 |\n", + "| policy_loss | -0.0626 |\n", + "| std | 0.685 |\n", + "| value_loss | 0.000195 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 360/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.17 |\n", + "| ep_rew_mean | -0.34 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 148 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 300265 |\n", + "| train/ | |\n", + "| entropy_loss | -7.04 |\n", + "| explained_variance | 0.883 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 60052 |\n", + "| policy_loss | -0.353 |\n", + "| std | 0.683 |\n", + "| value_loss | 0.00595 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 361/600: Total Reward = -0.47\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.49 |\n", + "| ep_rew_mean | -0.373 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 133 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 301100 |\n", + "| train/ | |\n", + "| entropy_loss | -7.01 |\n", + "| explained_variance | 0.894 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 60219 |\n", + "| policy_loss | -0.667 |\n", + "| std | 0.681 |\n", + "| value_loss | 0.0108 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 362/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.77 |\n", + "| ep_rew_mean | -0.309 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 154 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 301935 |\n", + "| train/ | |\n", + "| entropy_loss | -7.02 |\n", + "| explained_variance | 0.91 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 60386 |\n", + "| policy_loss | 0.681 |\n", + "| std | 0.683 |\n", + "| value_loss | 0.00821 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 363/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.77 |\n", + "| ep_rew_mean | -0.308 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 302770 |\n", + "| train/ | |\n", + "| entropy_loss | -7.04 |\n", + "| explained_variance | -0.0864 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 60553 |\n", + "| policy_loss | 0.962 |\n", + "| std | 0.685 |\n", + "| value_loss | 0.0182 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 364/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.46 |\n", + "| ep_rew_mean | -0.287 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 94 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 303605 |\n", + "| train/ | |\n", + "| entropy_loss | -7.02 |\n", + "| explained_variance | 0.978 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 60720 |\n", + "| policy_loss | 0.00779 |\n", + "| std | 0.683 |\n", + "| value_loss | 0.000364 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 365/600: Total Reward = -0.40\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.71 |\n", + "| ep_rew_mean | -0.287 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 304440 |\n", + "| train/ | |\n", + "| entropy_loss | -7.03 |\n", + "| explained_variance | 0.499 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 60887 |\n", + "| policy_loss | -0.87 |\n", + "| std | 0.685 |\n", + "| value_loss | 0.0322 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 366/600: Total Reward = -0.66\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 102 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 305275 |\n", + "| train/ | |\n", + "| entropy_loss | -7.04 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 61054 |\n", + "| policy_loss | 0.0711 |\n", + "| std | 0.684 |\n", + "| value_loss | 0.0006 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 367/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.301 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 149 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 306110 |\n", + "| train/ | |\n", + "| entropy_loss | -7.03 |\n", + "| explained_variance | 0.864 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 61221 |\n", + "| policy_loss | 0.968 |\n", + "| std | 0.683 |\n", + "| value_loss | 0.0183 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 368/600: Total Reward = -0.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.78 |\n", + "| ep_rew_mean | -0.301 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 147 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 306945 |\n", + "| train/ | |\n", + "| entropy_loss | -7 |\n", + "| explained_variance | -0.253 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 61388 |\n", + "| policy_loss | 0.0826 |\n", + "| std | 0.681 |\n", + "| value_loss | 0.00118 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 369/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.63 |\n", + "| ep_rew_mean | -0.301 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 109 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 307780 |\n", + "| train/ | |\n", + "| entropy_loss | -6.97 |\n", + "| explained_variance | 0.943 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 61555 |\n", + "| policy_loss | -0.149 |\n", + "| std | 0.679 |\n", + "| value_loss | 0.00142 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 370/600: Total Reward = -0.75\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.47 |\n", + "| ep_rew_mean | -0.277 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 141 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 308615 |\n", + "| train/ | |\n", + "| entropy_loss | -6.95 |\n", + "| explained_variance | 0.857 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 61722 |\n", + "| policy_loss | 0.0173 |\n", + "| std | 0.676 |\n", + "| value_loss | 0.00115 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 371/600: Total Reward = -0.39\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.83 |\n", + "| ep_rew_mean | -0.304 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 89 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 309450 |\n", + "| train/ | |\n", + "| entropy_loss | -6.96 |\n", + "| explained_variance | 0.877 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 61889 |\n", + "| policy_loss | 0.543 |\n", + "| std | 0.676 |\n", + "| value_loss | 0.00691 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 372/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 149 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 310285 |\n", + "| train/ | |\n", + "| entropy_loss | -6.96 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 62056 |\n", + "| policy_loss | -0.523 |\n", + "| std | 0.677 |\n", + "| value_loss | 0.00923 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 373/600: Total Reward = -0.45\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.58 |\n", + "| ep_rew_mean | -0.389 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 311120 |\n", + "| train/ | |\n", + "| entropy_loss | -6.94 |\n", + "| explained_variance | 0.432 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 62223 |\n", + "| policy_loss | 1.12 |\n", + "| std | 0.676 |\n", + "| value_loss | 0.0293 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 374/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.317 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 155 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 311955 |\n", + "| train/ | |\n", + "| entropy_loss | -6.93 |\n", + "| explained_variance | 0.747 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 62390 |\n", + "| policy_loss | 0.197 |\n", + "| std | 0.675 |\n", + "| value_loss | 0.00178 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 375/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.36 |\n", + "| ep_rew_mean | -0.269 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 312790 |\n", + "| train/ | |\n", + "| entropy_loss | -6.95 |\n", + "| explained_variance | 0.324 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 62557 |\n", + "| policy_loss | 0.634 |\n", + "| std | 0.678 |\n", + "| value_loss | 0.0161 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 376/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.92 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 90 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 313625 |\n", + "| train/ | |\n", + "| entropy_loss | -6.95 |\n", + "| explained_variance | 0.251 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 62724 |\n", + "| policy_loss | -0.203 |\n", + "| std | 0.679 |\n", + "| value_loss | 0.0033 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 377/600: Total Reward = -0.63\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.308 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 146 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 314460 |\n", + "| train/ | |\n", + "| entropy_loss | -6.97 |\n", + "| explained_variance | 0.983 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 62891 |\n", + "| policy_loss | 0.0749 |\n", + "| std | 0.681 |\n", + "| value_loss | 0.000229 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 378/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.3 |\n", + "| ep_rew_mean | -0.332 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 92 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 315295 |\n", + "| train/ | |\n", + "| entropy_loss | -6.94 |\n", + "| explained_variance | 0.945 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 63058 |\n", + "| policy_loss | 0.133 |\n", + "| std | 0.678 |\n", + "| value_loss | 0.00073 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 379/600: Total Reward = -0.44\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.3 |\n", + "| ep_rew_mean | -0.345 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 144 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 316130 |\n", + "| train/ | |\n", + "| entropy_loss | -6.9 |\n", + "| explained_variance | 0.933 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 63225 |\n", + "| policy_loss | -0.209 |\n", + "| std | 0.674 |\n", + "| value_loss | 0.00246 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 380/600: Total Reward = -0.59\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.3 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 136 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 316965 |\n", + "| train/ | |\n", + "| entropy_loss | -6.87 |\n", + "| explained_variance | 0.982 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 63392 |\n", + "| policy_loss | 0.54 |\n", + "| std | 0.671 |\n", + "| value_loss | 0.00579 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 381/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 137 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 317800 |\n", + "| train/ | |\n", + "| entropy_loss | -6.91 |\n", + "| explained_variance | 0.138 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 63559 |\n", + "| policy_loss | 0.605 |\n", + "| std | 0.675 |\n", + "| value_loss | 0.00984 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 382/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.8 |\n", + "| ep_rew_mean | -0.299 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 318635 |\n", + "| train/ | |\n", + "| entropy_loss | -6.91 |\n", + "| explained_variance | 0.955 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 63726 |\n", + "| policy_loss | 0.436 |\n", + "| std | 0.674 |\n", + "| value_loss | 0.00526 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 383/600: Total Reward = -0.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.91 |\n", + "| ep_rew_mean | -0.316 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 85 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 319470 |\n", + "| train/ | |\n", + "| entropy_loss | -6.9 |\n", + "| explained_variance | 0.962 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 63893 |\n", + "| policy_loss | 0.155 |\n", + "| std | 0.674 |\n", + "| value_loss | 0.00134 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 384/600: Total Reward = -0.36\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.46 |\n", + "| ep_rew_mean | -0.465 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 148 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 320305 |\n", + "| train/ | |\n", + "| entropy_loss | -6.9 |\n", + "| explained_variance | -0.555 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 64060 |\n", + "| policy_loss | 2.33 |\n", + "| std | 0.674 |\n", + "| value_loss | 0.161 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 385/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.72 |\n", + "| ep_rew_mean | -0.455 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 99 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 321140 |\n", + "| train/ | |\n", + "| entropy_loss | -6.94 |\n", + "| explained_variance | -0.446 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 64227 |\n", + "| policy_loss | -0.962 |\n", + "| std | 0.679 |\n", + "| value_loss | 0.0226 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 386/600: Total Reward = -0.05\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.11 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 141 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 321975 |\n", + "| train/ | |\n", + "| entropy_loss | -6.95 |\n", + "| explained_variance | 0.188 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 64394 |\n", + "| policy_loss | 0.337 |\n", + "| std | 0.68 |\n", + "| value_loss | 0.0108 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 387/600: Total Reward = -0.60\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.66 |\n", + "| ep_rew_mean | -0.393 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 145 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 322810 |\n", + "| train/ | |\n", + "| entropy_loss | -6.94 |\n", + "| explained_variance | -0.774 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 64561 |\n", + "| policy_loss | 0.387 |\n", + "| std | 0.678 |\n", + "| value_loss | 0.00816 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 388/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.12 |\n", + "| ep_rew_mean | -0.42 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 124 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 323645 |\n", + "| train/ | |\n", + "| entropy_loss | -6.92 |\n", + "| explained_variance | 0.947 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 64728 |\n", + "| policy_loss | -0.254 |\n", + "| std | 0.676 |\n", + "| value_loss | 0.00283 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 389/600: Total Reward = -0.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.43 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 150 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 324480 |\n", + "| train/ | |\n", + "| entropy_loss | -6.95 |\n", + "| explained_variance | 0.773 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 64895 |\n", + "| policy_loss | 0.486 |\n", + "| std | 0.678 |\n", + "| value_loss | 0.0126 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 390/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.03 |\n", + "| ep_rew_mean | -0.335 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 87 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 325315 |\n", + "| train/ | |\n", + "| entropy_loss | -6.96 |\n", + "| explained_variance | 0.858 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 65062 |\n", + "| policy_loss | -0.468 |\n", + "| std | 0.68 |\n", + "| value_loss | 0.00527 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 391/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.21 |\n", + "| ep_rew_mean | -0.334 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 139 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 326150 |\n", + "| train/ | |\n", + "| entropy_loss | -6.94 |\n", + "| explained_variance | 0.596 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 65229 |\n", + "| policy_loss | 0.138 |\n", + "| std | 0.679 |\n", + "| value_loss | 0.0025 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 392/600: Total Reward = -0.22\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.07 |\n", + "| ep_rew_mean | -0.313 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 94 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 326985 |\n", + "| train/ | |\n", + "| entropy_loss | -6.93 |\n", + "| explained_variance | 0.816 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 65396 |\n", + "| policy_loss | 0.266 |\n", + "| std | 0.678 |\n", + "| value_loss | 0.00524 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 393/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.69 |\n", + "| ep_rew_mean | -0.279 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 138 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 327820 |\n", + "| train/ | |\n", + "| entropy_loss | -6.93 |\n", + "| explained_variance | 0.925 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 65563 |\n", + "| policy_loss | -0.165 |\n", + "| std | 0.678 |\n", + "| value_loss | 0.00237 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 394/600: Total Reward = -0.40\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.91 |\n", + "| ep_rew_mean | -0.313 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 132 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 328655 |\n", + "| train/ | |\n", + "| entropy_loss | -6.93 |\n", + "| explained_variance | 0.984 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 65730 |\n", + "| policy_loss | -0.129 |\n", + "| std | 0.677 |\n", + "| value_loss | 0.00095 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 395/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.9 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 67 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 7 |\n", + "| total_timesteps | 329490 |\n", + "| train/ | |\n", + "| entropy_loss | -6.92 |\n", + "| explained_variance | 0.884 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 65897 |\n", + "| policy_loss | 0.473 |\n", + "| std | 0.676 |\n", + "| value_loss | 0.00597 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 396/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.66 |\n", + "| ep_rew_mean | -0.291 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 84 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 330325 |\n", + "| train/ | |\n", + "| entropy_loss | -6.89 |\n", + "| explained_variance | 0.976 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 66064 |\n", + "| policy_loss | 0.166 |\n", + "| std | 0.673 |\n", + "| value_loss | 0.000944 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 397/600: Total Reward = -0.56\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.73 |\n", + "| ep_rew_mean | -0.29 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 131 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 331160 |\n", + "| train/ | |\n", + "| entropy_loss | -6.87 |\n", + "| explained_variance | 0.98 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 66231 |\n", + "| policy_loss | 0.172 |\n", + "| std | 0.67 |\n", + "| value_loss | 0.00132 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 398/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.12 |\n", + "| ep_rew_mean | -0.324 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 108 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 331995 |\n", + "| train/ | |\n", + "| entropy_loss | -6.88 |\n", + "| explained_variance | 0.699 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 66398 |\n", + "| policy_loss | -0.255 |\n", + "| std | 0.672 |\n", + "| value_loss | 0.00258 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 399/600: Total Reward = -0.35\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.87 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 142 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 332830 |\n", + "| train/ | |\n", + "| entropy_loss | -6.88 |\n", + "| explained_variance | 0.842 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 66565 |\n", + "| policy_loss | 0.0753 |\n", + "| std | 0.67 |\n", + "| value_loss | 0.00117 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 400/600: Total Reward = -0.31\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.311 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 135 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 333665 |\n", + "| train/ | |\n", + "| entropy_loss | -6.82 |\n", + "| explained_variance | 0.751 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 66732 |\n", + "| policy_loss | -0.0103 |\n", + "| std | 0.665 |\n", + "| value_loss | 0.00488 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 401/600: Total Reward = -0.07\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.306 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 114 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 334500 |\n", + "| train/ | |\n", + "| entropy_loss | -6.82 |\n", + "| explained_variance | 0.699 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 66899 |\n", + "| policy_loss | 0.388 |\n", + "| std | 0.666 |\n", + "| value_loss | 0.00593 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 402/600: Total Reward = -0.25\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.65 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 132 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 335335 |\n", + "| train/ | |\n", + "| entropy_loss | -6.79 |\n", + "| explained_variance | 0.894 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 67066 |\n", + "| policy_loss | 0.448 |\n", + "| std | 0.662 |\n", + "| value_loss | 0.00662 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 403/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.97 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 86 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 336170 |\n", + "| train/ | |\n", + "| entropy_loss | -6.79 |\n", + "| explained_variance | 0.966 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 67233 |\n", + "| policy_loss | -0.2 |\n", + "| std | 0.663 |\n", + "| value_loss | 0.000921 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 404/600: Total Reward = -0.64\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.11 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 137 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 337005 |\n", + "| train/ | |\n", + "| entropy_loss | -6.76 |\n", + "| explained_variance | 0.855 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 67400 |\n", + "| policy_loss | 0.697 |\n", + "| std | 0.66 |\n", + "| value_loss | 0.0101 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 405/600: Total Reward = -0.87\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.04 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 86 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 337840 |\n", + "| train/ | |\n", + "| entropy_loss | -6.75 |\n", + "| explained_variance | 0.833 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 67567 |\n", + "| policy_loss | -0.317 |\n", + "| std | 0.66 |\n", + "| value_loss | 0.00458 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 406/600: Total Reward = -0.61\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 135 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 338675 |\n", + "| train/ | |\n", + "| entropy_loss | -6.72 |\n", + "| explained_variance | 0.979 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 67734 |\n", + "| policy_loss | 0.399 |\n", + "| std | 0.657 |\n", + "| value_loss | 0.0035 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 407/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.76 |\n", + "| ep_rew_mean | -0.303 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 111 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 339510 |\n", + "| train/ | |\n", + "| entropy_loss | -6.74 |\n", + "| explained_variance | 0.994 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 67901 |\n", + "| policy_loss | 0.315 |\n", + "| std | 0.658 |\n", + "| value_loss | 0.00248 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 408/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 143 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 340345 |\n", + "| train/ | |\n", + "| entropy_loss | -6.7 |\n", + "| explained_variance | 0.686 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 68068 |\n", + "| policy_loss | 0.423 |\n", + "| std | 0.655 |\n", + "| value_loss | 0.0103 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 409/600: Total Reward = -0.72\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.07 |\n", + "| ep_rew_mean | -0.332 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 135 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 341180 |\n", + "| train/ | |\n", + "| entropy_loss | -6.67 |\n", + "| explained_variance | 0.948 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 68235 |\n", + "| policy_loss | 0.323 |\n", + "| std | 0.652 |\n", + "| value_loss | 0.00532 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 410/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.17 |\n", + "| ep_rew_mean | -0.328 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 342015 |\n", + "| train/ | |\n", + "| entropy_loss | -6.67 |\n", + "| explained_variance | 0.949 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 68402 |\n", + "| policy_loss | 0.0739 |\n", + "| std | 0.651 |\n", + "| value_loss | 0.000284 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 411/600: Total Reward = -0.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.55 |\n", + "| ep_rew_mean | -0.283 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 132 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 342850 |\n", + "| train/ | |\n", + "| entropy_loss | -6.66 |\n", + "| explained_variance | 0.958 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 68569 |\n", + "| policy_loss | -0.0214 |\n", + "| std | 0.651 |\n", + "| value_loss | 0.00108 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 412/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.57 |\n", + "| ep_rew_mean | -0.385 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 82 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 343685 |\n", + "| train/ | |\n", + "| entropy_loss | -6.62 |\n", + "| explained_variance | 0.886 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 68736 |\n", + "| policy_loss | -0.209 |\n", + "| std | 0.647 |\n", + "| value_loss | 0.00253 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 413/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.6 |\n", + "| ep_rew_mean | -0.284 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 136 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 344520 |\n", + "| train/ | |\n", + "| entropy_loss | -6.57 |\n", + "| explained_variance | 0.933 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 68903 |\n", + "| policy_loss | 0.0325 |\n", + "| std | 0.644 |\n", + "| value_loss | 0.000586 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 414/600: Total Reward = -0.24\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.326 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 85 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 345355 |\n", + "| train/ | |\n", + "| entropy_loss | -6.57 |\n", + "| explained_variance | 0.654 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 69070 |\n", + "| policy_loss | -0.126 |\n", + "| std | 0.644 |\n", + "| value_loss | 0.0016 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 415/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.55 |\n", + "| ep_rew_mean | -0.286 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 136 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 346190 |\n", + "| train/ | |\n", + "| entropy_loss | -6.55 |\n", + "| explained_variance | 0.968 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 69237 |\n", + "| policy_loss | 0.283 |\n", + "| std | 0.642 |\n", + "| value_loss | 0.00172 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 416/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.69 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 103 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 347025 |\n", + "| train/ | |\n", + "| entropy_loss | -6.51 |\n", + "| explained_variance | 0.962 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 69404 |\n", + "| policy_loss | 0.049 |\n", + "| std | 0.639 |\n", + "| value_loss | 0.00227 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 417/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 143 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 347860 |\n", + "| train/ | |\n", + "| entropy_loss | -6.52 |\n", + "| explained_variance | 0.892 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 69571 |\n", + "| policy_loss | 0.246 |\n", + "| std | 0.64 |\n", + "| value_loss | 0.00958 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 418/600: Total Reward = -0.47\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.54 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 136 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 348695 |\n", + "| train/ | |\n", + "| entropy_loss | -6.51 |\n", + "| explained_variance | -0.18 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 69738 |\n", + "| policy_loss | -1.38 |\n", + "| std | 0.638 |\n", + "| value_loss | 0.0554 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 419/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.308 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 112 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 349530 |\n", + "| train/ | |\n", + "| entropy_loss | -6.51 |\n", + "| explained_variance | 0.999 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 69905 |\n", + "| policy_loss | 0.101 |\n", + "| std | 0.637 |\n", + "| value_loss | 0.000245 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 420/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.71 |\n", + "| ep_rew_mean | -0.286 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 139 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 350365 |\n", + "| train/ | |\n", + "| entropy_loss | -6.5 |\n", + "| explained_variance | -0.475 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 70072 |\n", + "| policy_loss | -0.611 |\n", + "| std | 0.637 |\n", + "| value_loss | 0.0413 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 421/600: Total Reward = -0.41\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.42 |\n", + "| ep_rew_mean | -0.276 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 80 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 351200 |\n", + "| train/ | |\n", + "| entropy_loss | -6.48 |\n", + "| explained_variance | 0.836 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 70239 |\n", + "| policy_loss | -0.23 |\n", + "| std | 0.636 |\n", + "| value_loss | 0.00232 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 422/600: Total Reward = -0.64\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.04 |\n", + "| ep_rew_mean | -0.324 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 137 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 352035 |\n", + "| train/ | |\n", + "| entropy_loss | -6.47 |\n", + "| explained_variance | 0.979 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 70406 |\n", + "| policy_loss | -0.117 |\n", + "| std | 0.636 |\n", + "| value_loss | 0.00089 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 423/600: Total Reward = -0.70\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.295 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 81 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 352870 |\n", + "| train/ | |\n", + "| entropy_loss | -6.46 |\n", + "| explained_variance | 0.881 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 70573 |\n", + "| policy_loss | -0.0739 |\n", + "| std | 0.633 |\n", + "| value_loss | 0.00098 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 424/600: Total Reward = -0.25\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.03 |\n", + "| ep_rew_mean | -0.323 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 132 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 353705 |\n", + "| train/ | |\n", + "| entropy_loss | -6.43 |\n", + "| explained_variance | 0.983 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 70740 |\n", + "| policy_loss | -0.095 |\n", + "| std | 0.631 |\n", + "| value_loss | 0.000518 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 425/600: Total Reward = -1.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 86 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 354540 |\n", + "| train/ | |\n", + "| entropy_loss | -6.44 |\n", + "| explained_variance | 0.528 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 70907 |\n", + "| policy_loss | -0.747 |\n", + "| std | 0.632 |\n", + "| value_loss | 0.0215 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 426/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 133 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 355375 |\n", + "| train/ | |\n", + "| entropy_loss | -6.42 |\n", + "| explained_variance | 0.987 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 71074 |\n", + "| policy_loss | -0.231 |\n", + "| std | 0.631 |\n", + "| value_loss | 0.00136 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 427/600: Total Reward = -0.45\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.72 |\n", + "| ep_rew_mean | -0.295 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 112 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 356210 |\n", + "| train/ | |\n", + "| entropy_loss | -6.38 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 71241 |\n", + "| policy_loss | -0.11 |\n", + "| std | 0.627 |\n", + "| value_loss | 0.000295 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 428/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.8 |\n", + "| ep_rew_mean | -0.304 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 140 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 357045 |\n", + "| train/ | |\n", + "| entropy_loss | -6.35 |\n", + "| explained_variance | -0.314 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 71408 |\n", + "| policy_loss | -0.275 |\n", + "| std | 0.625 |\n", + "| value_loss | 0.0126 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 429/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.317 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 135 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 357880 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 71575 |\n", + "| policy_loss | 0.27 |\n", + "| std | 0.619 |\n", + "| value_loss | 0.00176 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 430/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.46 |\n", + "| ep_rew_mean | -0.374 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 105 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 358715 |\n", + "| train/ | |\n", + "| entropy_loss | -6.31 |\n", + "| explained_variance | 0.94 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 71742 |\n", + "| policy_loss | 0.128 |\n", + "| std | 0.621 |\n", + "| value_loss | 0.00311 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 431/600: Total Reward = -0.35\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.81 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 134 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 359550 |\n", + "| train/ | |\n", + "| entropy_loss | -6.31 |\n", + "| explained_variance | 0.932 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 71909 |\n", + "| policy_loss | 0.485 |\n", + "| std | 0.621 |\n", + "| value_loss | 0.00711 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 432/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.82 |\n", + "| ep_rew_mean | -0.309 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 79 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 360385 |\n", + "| train/ | |\n", + "| entropy_loss | -6.27 |\n", + "| explained_variance | 0.923 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 72076 |\n", + "| policy_loss | 0.224 |\n", + "| std | 0.617 |\n", + "| value_loss | 0.00297 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 433/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.73 |\n", + "| ep_rew_mean | -0.299 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 361220 |\n", + "| train/ | |\n", + "| entropy_loss | -6.28 |\n", + "| explained_variance | 0.763 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 72243 |\n", + "| policy_loss | -0.206 |\n", + "| std | 0.619 |\n", + "| value_loss | 0.00127 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 434/600: Total Reward = -0.07\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.77 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 79 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 362055 |\n", + "| train/ | |\n", + "| entropy_loss | -6.27 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 72410 |\n", + "| policy_loss | -0.36 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.00439 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 435/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.1 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 124 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 362890 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.526 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 72577 |\n", + "| policy_loss | -0.112 |\n", + "| std | 0.62 |\n", + "| value_loss | 0.00698 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 436/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.64 |\n", + "| ep_rew_mean | -0.3 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 81 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 363725 |\n", + "| train/ | |\n", + "| entropy_loss | -6.3 |\n", + "| explained_variance | 0.967 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 72744 |\n", + "| policy_loss | 0.462 |\n", + "| std | 0.62 |\n", + "| value_loss | 0.0121 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 437/600: Total Reward = -0.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.25 |\n", + "| ep_rew_mean | -0.438 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 132 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 364560 |\n", + "| train/ | |\n", + "| entropy_loss | -6.25 |\n", + "| explained_variance | 0.965 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 72911 |\n", + "| policy_loss | 0.0682 |\n", + "| std | 0.617 |\n", + "| value_loss | 0.00111 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 438/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.306 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 91 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 365395 |\n", + "| train/ | |\n", + "| entropy_loss | -6.25 |\n", + "| explained_variance | 0.601 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73078 |\n", + "| policy_loss | -0.559 |\n", + "| std | 0.617 |\n", + "| value_loss | 0.00845 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 439/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.87 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 135 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 366230 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.986 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73245 |\n", + "| policy_loss | 0.476 |\n", + "| std | 0.62 |\n", + "| value_loss | 0.00799 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 440/600: Total Reward = -0.24\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.6 |\n", + "| ep_rew_mean | -0.389 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 116 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 367065 |\n", + "| train/ | |\n", + "| entropy_loss | -6.27 |\n", + "| explained_variance | 0.552 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73412 |\n", + "| policy_loss | 0.0433 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.00165 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 441/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.63 |\n", + "| ep_rew_mean | -0.368 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 130 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 367900 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.222 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73579 |\n", + "| policy_loss | -0.941 |\n", + "| std | 0.619 |\n", + "| value_loss | 0.0356 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 442/600: Total Reward = -0.60\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.33 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 368735 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.789 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73746 |\n", + "| policy_loss | 0.099 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.00123 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 443/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.95 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 115 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 369570 |\n", + "| train/ | |\n", + "| entropy_loss | -6.28 |\n", + "| explained_variance | 0.702 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 73913 |\n", + "| policy_loss | -0.399 |\n", + "| std | 0.617 |\n", + "| value_loss | 0.00932 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 444/600: Total Reward = -0.02\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.29 |\n", + "| ep_rew_mean | -0.348 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 124 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 370405 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.965 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74080 |\n", + "| policy_loss | -0.402 |\n", + "| std | 0.619 |\n", + "| value_loss | 0.00599 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 445/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.51 |\n", + "| ep_rew_mean | -0.287 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 106 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 371240 |\n", + "| train/ | |\n", + "| entropy_loss | -6.3 |\n", + "| explained_variance | 0.903 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74247 |\n", + "| policy_loss | -0.473 |\n", + "| std | 0.619 |\n", + "| value_loss | 0.00389 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 446/600: Total Reward = -0.46\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.93 |\n", + "| ep_rew_mean | -0.312 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 372075 |\n", + "| train/ | |\n", + "| entropy_loss | -6.28 |\n", + "| explained_variance | 0.849 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74414 |\n", + "| policy_loss | 0.642 |\n", + "| std | 0.617 |\n", + "| value_loss | 0.00972 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 447/600: Total Reward = -0.59\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.37 |\n", + "| ep_rew_mean | -0.276 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 93 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 372910 |\n", + "| train/ | |\n", + "| entropy_loss | -6.28 |\n", + "| explained_variance | 0.975 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74581 |\n", + "| policy_loss | 0.0328 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.00117 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 448/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.72 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 373745 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.996 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74748 |\n", + "| policy_loss | -0.238 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.00211 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 449/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.331 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 86 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 374580 |\n", + "| train/ | |\n", + "| entropy_loss | -6.28 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 74915 |\n", + "| policy_loss | -0.0917 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.000394 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 450/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.5 |\n", + "| ep_rew_mean | -0.399 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 375415 |\n", + "| train/ | |\n", + "| entropy_loss | -6.27 |\n", + "| explained_variance | 0.977 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75082 |\n", + "| policy_loss | 0.136 |\n", + "| std | 0.617 |\n", + "| value_loss | 0.000604 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 451/600: Total Reward = -0.08\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.99 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 78 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 376250 |\n", + "| train/ | |\n", + "| entropy_loss | -6.25 |\n", + "| explained_variance | 0.987 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75249 |\n", + "| policy_loss | 0.162 |\n", + "| std | 0.616 |\n", + "| value_loss | 0.000942 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 452/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.98 |\n", + "| ep_rew_mean | -0.343 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 377085 |\n", + "| train/ | |\n", + "| entropy_loss | -6.26 |\n", + "| explained_variance | 0.962 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75416 |\n", + "| policy_loss | 0.216 |\n", + "| std | 0.616 |\n", + "| value_loss | 0.00184 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 453/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.99 |\n", + "| ep_rew_mean | -0.334 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 78 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 377920 |\n", + "| train/ | |\n", + "| entropy_loss | -6.29 |\n", + "| explained_variance | 0.943 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75583 |\n", + "| policy_loss | 0.56 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.0135 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 454/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.29 |\n", + "| ep_rew_mean | -0.345 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 378755 |\n", + "| train/ | |\n", + "| entropy_loss | -6.26 |\n", + "| explained_variance | 0.989 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75750 |\n", + "| policy_loss | 0.415 |\n", + "| std | 0.615 |\n", + "| value_loss | 0.00759 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 455/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 77 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 379590 |\n", + "| train/ | |\n", + "| entropy_loss | -6.22 |\n", + "| explained_variance | 0.987 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 75917 |\n", + "| policy_loss | -1.06 |\n", + "| std | 0.611 |\n", + "| value_loss | 0.0222 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 456/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.317 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 130 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 380425 |\n", + "| train/ | |\n", + "| entropy_loss | -6.23 |\n", + "| explained_variance | -0.0219 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76084 |\n", + "| policy_loss | -0.933 |\n", + "| std | 0.614 |\n", + "| value_loss | 0.0565 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 457/600: Total Reward = -0.81\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4 |\n", + "| ep_rew_mean | -0.33 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 77 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 381260 |\n", + "| train/ | |\n", + "| entropy_loss | -6.22 |\n", + "| explained_variance | 0.985 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76251 |\n", + "| policy_loss | 0.102 |\n", + "| std | 0.612 |\n", + "| value_loss | 0.00187 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 458/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.6 |\n", + "| ep_rew_mean | -0.279 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 126 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 382095 |\n", + "| train/ | |\n", + "| entropy_loss | -6.21 |\n", + "| explained_variance | 0.903 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76418 |\n", + "| policy_loss | -0.396 |\n", + "| std | 0.613 |\n", + "| value_loss | 0.0055 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 459/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.95 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 75 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 382930 |\n", + "| train/ | |\n", + "| entropy_loss | -6.17 |\n", + "| explained_variance | 0.253 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76585 |\n", + "| policy_loss | -0.343 |\n", + "| std | 0.61 |\n", + "| value_loss | 0.0118 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 460/600: Total Reward = -0.22\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.14 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 115 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 383765 |\n", + "| train/ | |\n", + "| entropy_loss | -6.18 |\n", + "| explained_variance | 0.908 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76752 |\n", + "| policy_loss | -0.0353 |\n", + "| std | 0.611 |\n", + "| value_loss | 0.000821 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 461/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.22 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 61 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 8 |\n", + "| total_timesteps | 384600 |\n", + "| train/ | |\n", + "| entropy_loss | -6.2 |\n", + "| explained_variance | 0.201 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 76919 |\n", + "| policy_loss | 0.334 |\n", + "| std | 0.613 |\n", + "| value_loss | 0.00566 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 462/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 95 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 385435 |\n", + "| train/ | |\n", + "| entropy_loss | -6.17 |\n", + "| explained_variance | 0.944 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77086 |\n", + "| policy_loss | 0.105 |\n", + "| std | 0.61 |\n", + "| value_loss | 0.000706 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 463/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.38 |\n", + "| ep_rew_mean | -0.347 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 126 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 386270 |\n", + "| train/ | |\n", + "| entropy_loss | -6.16 |\n", + "| explained_variance | 0.776 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77253 |\n", + "| policy_loss | 0.508 |\n", + "| std | 0.61 |\n", + "| value_loss | 0.00696 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 464/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.93 |\n", + "| ep_rew_mean | -0.316 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 90 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 387105 |\n", + "| train/ | |\n", + "| entropy_loss | -6.15 |\n", + "| explained_variance | 0.953 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77420 |\n", + "| policy_loss | -0.544 |\n", + "| std | 0.608 |\n", + "| value_loss | 0.00598 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 465/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.46 |\n", + "| ep_rew_mean | -0.367 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 118 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 387940 |\n", + "| train/ | |\n", + "| entropy_loss | -6.13 |\n", + "| explained_variance | 0.934 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77587 |\n", + "| policy_loss | 0.511 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.0118 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 466/600: Total Reward = -0.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.39 |\n", + "| ep_rew_mean | -0.477 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 82 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 388775 |\n", + "| train/ | |\n", + "| entropy_loss | -6.11 |\n", + "| explained_variance | 0.839 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77754 |\n", + "| policy_loss | -0.432 |\n", + "| std | 0.604 |\n", + "| value_loss | 0.00884 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 467/600: Total Reward = -0.25\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.03 |\n", + "| ep_rew_mean | -0.354 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 114 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 389610 |\n", + "| train/ | |\n", + "| entropy_loss | -6.09 |\n", + "| explained_variance | -0.0693 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 77921 |\n", + "| policy_loss | 2.16 |\n", + "| std | 0.603 |\n", + "| value_loss | 0.165 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 468/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.78 |\n", + "| ep_rew_mean | -0.4 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 73 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 390445 |\n", + "| train/ | |\n", + "| entropy_loss | -6.11 |\n", + "| explained_variance | 0.974 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78088 |\n", + "| policy_loss | 0.0607 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.000318 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, { "output_type": "stream", "name": "stdout", "text": [ - "Requirement already satisfied: wandb in /usr/local/lib/python3.11/dist-packages (0.19.6)\n", - "Requirement already satisfied: tensorboard in /usr/local/lib/python3.11/dist-packages (2.18.0)\n", - "Requirement already satisfied: click!=8.0.0,>=7.1 in /usr/local/lib/python3.11/dist-packages (from wandb) (8.1.8)\n", - "Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (0.4.0)\n", - "Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (3.1.44)\n", - "Requirement already satisfied: platformdirs in /usr/local/lib/python3.11/dist-packages (from wandb) (4.3.6)\n", - "Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (4.25.6)\n", - "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (5.9.5)\n", - "Requirement already satisfied: pydantic<3,>=2.6 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.10.6)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from wandb) (6.0.2)\n", - "Requirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.32.3)\n", - "Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.22.0)\n", - "Requirement already satisfied: setproctitle in /usr/local/lib/python3.11/dist-packages (from wandb) (1.3.4)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from wandb) (75.1.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.4 in /usr/local/lib/python3.11/dist-packages (from wandb) (4.12.2)\n", - "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.4.0)\n", - "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.70.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (3.7)\n", - "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.26.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorboard) (24.2)\n", - "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.17.0)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (3.1.3)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=2.6->wandb) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=2.6->wandb) (2.27.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2025.1.31)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.11/dist-packages (from werkzeug>=1.0.1->tensorboard) (3.0.2)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.2)\n", - "Collecting stable-baselines3\n", - " Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)\n", - "Requirement already satisfied: gymnasium<1.1.0,>=0.29.1 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (1.0.0)\n", - "Requirement already satisfied: numpy<3.0,>=1.20 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (1.26.4)\n", - "Requirement already satisfied: torch<3.0,>=2.3 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (2.5.1+cu124)\n", - "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (3.1.1)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (2.2.2)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (3.10.0)\n", - "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from gymnasium<1.1.0,>=0.29.1->stable-baselines3) (4.12.2)\n", - "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium<1.1.0,>=0.29.1->stable-baselines3) (0.0.4)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.17.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.1.5)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (2024.10.0)\n", - "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", - "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", - "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", - "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", - "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", - "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", - "Collecting nvidia-curand-cu12==10.3.5.147 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", - "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", - "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.127)\n", - "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)\n", - " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", - "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.1.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (1.13.1)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3.0,>=2.3->stable-baselines3) (1.3.0)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (1.3.1)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (4.56.0)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (1.4.8)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (24.2)\n", - "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (11.1.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (3.2.1)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3) (2025.1)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3) (2025.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.7->matplotlib->stable-baselines3) (1.17.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch<3.0,>=2.3->stable-baselines3) (3.0.2)\n", - "Downloading stable_baselines3-2.5.0-py3-none-any.whl (183 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m90.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m72.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m42.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m48.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, stable-baselines3\n", - " Attempting uninstall: nvidia-nvjitlink-cu12\n", - " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", - " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", - " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", - " Attempting uninstall: nvidia-curand-cu12\n", - " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", - " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", - " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", - " Attempting uninstall: nvidia-cufft-cu12\n", - " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", - " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", - " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", - " Attempting uninstall: nvidia-cuda-runtime-cu12\n", - " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", - " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", - " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", - " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", - " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", - " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", - " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", - " Attempting uninstall: nvidia-cuda-cupti-cu12\n", - " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", - " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", - " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", - " Attempting uninstall: nvidia-cublas-cu12\n", - " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", - " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", - " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", - " Attempting uninstall: nvidia-cusparse-cu12\n", - " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", - " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", - " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", - " Attempting uninstall: nvidia-cudnn-cu12\n", - " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", - " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", - " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", - " Attempting uninstall: nvidia-cusolver-cu12\n", - " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", - " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", - " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", - "Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 stable-baselines3-2.5.0\n", - "Requirement already satisfied: wandb in /usr/local/lib/python3.11/dist-packages (0.19.6)\n", - "Requirement already satisfied: tensorboard in /usr/local/lib/python3.11/dist-packages (2.18.0)\n", - "Requirement already satisfied: stable-baselines3 in /usr/local/lib/python3.11/dist-packages (2.5.0)\n", - "Requirement already satisfied: gymnasium in /usr/local/lib/python3.11/dist-packages (1.0.0)\n", - "Collecting shimmy\n", - " Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)\n", - "Requirement already satisfied: click!=8.0.0,>=7.1 in /usr/local/lib/python3.11/dist-packages (from wandb) (8.1.8)\n", - "Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (0.4.0)\n", - "Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (3.1.44)\n", - "Requirement already satisfied: platformdirs in /usr/local/lib/python3.11/dist-packages (from wandb) (4.3.6)\n", - "Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (4.25.6)\n", - "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (5.9.5)\n", - "Requirement already satisfied: pydantic<3,>=2.6 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.10.6)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from wandb) (6.0.2)\n", - "Requirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.32.3)\n", - "Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.22.0)\n", - "Requirement already satisfied: setproctitle in /usr/local/lib/python3.11/dist-packages (from wandb) (1.3.4)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from wandb) (75.1.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.4 in /usr/local/lib/python3.11/dist-packages (from wandb) (4.12.2)\n", - "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.4.0)\n", - "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.70.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (3.7)\n", - "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.26.4)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorboard) (24.2)\n", - "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (1.17.0)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard) (3.1.3)\n", - "Requirement already satisfied: torch<3.0,>=2.3 in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (2.5.1+cu124)\n", - "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (3.1.1)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (2.2.2)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from stable-baselines3) (3.10.0)\n", - "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.11/dist-packages (from gymnasium) (0.0.4)\n", - "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=2.6->wandb) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=2.6->wandb) (2.27.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2025.1.31)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.17.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.1.5)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (2024.10.0)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.127)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.127)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.127)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (9.1.0.70)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.5.8)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (11.2.1.3)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (10.3.5.147)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (11.6.1.9)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.3.1.170)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (2.21.5)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.127)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (12.4.127)\n", - "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (3.1.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3.0,>=2.3->stable-baselines3) (1.13.1)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3.0,>=2.3->stable-baselines3) (1.3.0)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.11/dist-packages (from werkzeug>=1.0.1->tensorboard) (3.0.2)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (1.3.1)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (4.56.0)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (1.4.8)\n", - "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (11.1.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (3.2.1)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.11/dist-packages (from matplotlib->stable-baselines3) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3) (2025.1)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->stable-baselines3) (2025.1)\n", - "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.2)\n", - "Downloading Shimmy-2.0.0-py3-none-any.whl (30 kB)\n", - "Installing collected packages: shimmy\n", - "Successfully installed shimmy-2.0.0\n", - "Collecting huggingface_sb3\n", - " Downloading huggingface_sb3-3.0-py3-none-any.whl.metadata (6.3 kB)\n", - "Requirement already satisfied: huggingface-hub~=0.8 in /usr/local/lib/python3.11/dist-packages (from huggingface_sb3) (0.28.1)\n", - "Requirement already satisfied: pyyaml~=6.0 in /usr/local/lib/python3.11/dist-packages (from huggingface_sb3) (6.0.2)\n", - "Requirement already satisfied: wasabi in /usr/local/lib/python3.11/dist-packages (from huggingface_sb3) (1.1.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from huggingface_sb3) (1.26.4)\n", - "Requirement already satisfied: cloudpickle>=1.6 in /usr/local/lib/python3.11/dist-packages (from huggingface_sb3) (3.1.1)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (3.17.0)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (24.2)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (2.32.3)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (4.67.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (4.12.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (3.4.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (2.3.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (2025.1.31)\n", - "Downloading huggingface_sb3-3.0-py3-none-any.whl (9.7 kB)\n", - "Installing collected packages: huggingface_sb3\n", - "Successfully installed huggingface_sb3-3.0\n" + "Episode 469/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.72 |\n", + "| ep_rew_mean | -0.389 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 119 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 391280 |\n", + "| train/ | |\n", + "| entropy_loss | -6.1 |\n", + "| explained_variance | 0.989 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78255 |\n", + "| policy_loss | 0.169 |\n", + "| std | 0.604 |\n", + "| value_loss | 0.000894 |\n", + "------------------------------------\n" ] - } - ], - "source": [ - "! pip install wandb tensorboard\n", - "! pip install stable-baselines3\n", - "! pip install wandb tensorboard stable-baselines3 gymnasium shimmy\n", - "! pip install huggingface_sb3" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-rkwVtpujKtz" - }, - "source": [ - "### Get familiar with Stable-Baselines3" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17, - "referenced_widgets": [ - "a52deb23470f4f9d9acfe28e69f47fb5", - "c7294d001a9c44aca712bba2e7141b35", - "659607dd3c294904913ccb88a2ecfea5", - "ec4b7fdf9f344b5eb5aabfe00395ddea", - "4064541754324b308f9e02565edc7fc2", - "cc92a18e333e454eaf4d2b27ccad782b", - "136b0ee69b6a4012a1c4a92ea45a352e", - "02dcba9d3e194f79890227e180c37474", - "6376dea1e82f4c159ea5062c1b3b14ef", - "1a6532f14ca74a479f01155019a2a30f", - "3f9f02c927bb4e0c9e0002703189fbfa", - "c8a5fea9ebed4821821592ae85b0af71", - "439ac621f2cb4c0d91749ee09729453b", - "f116aadb2ef94eb8aa8a6b43c7b4fb5d", - "4f814644155549caa91d2d81d9333740", - "3675a21548ee4857b98b3bc0a9c206f3", - "463f22dcd9da484f98795b42c7406fb4", - "632684e2f5e648f3ad8eb6f65acbdd6b", - "b5bc5fcb6fea4586839b5dc6e43ce0f9", - "decc3d18711a459badab9c4def213303" - ] - }, - "id": "dWr7eVP7x5r5", - "outputId": "fb6dd84c-a12f-4d1e-b1fc-a3f694704037" - }, - "outputs": [ + }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…" - ], - "application/vnd.jupyter.widget-view+json": { - "version_major": 2, - "version_minor": 0, - "model_id": "a52deb23470f4f9d9acfe28e69f47fb5" - } - }, - "metadata": {} - } - ], - "source": [ - "from huggingface_hub import notebook_login\n", - "#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\n", - "notebook_login()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 373 + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 470/600: Total Reward = -0.36\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.71 |\n", + "| ep_rew_mean | -0.285 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 74 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 392115 |\n", + "| train/ | |\n", + "| entropy_loss | -6.11 |\n", + "| explained_variance | 0.81 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78422 |\n", + "| policy_loss | 0.815 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.0179 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 471/600: Total Reward = -0.71\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.65 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 127 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 392950 |\n", + "| train/ | |\n", + "| entropy_loss | -6.07 |\n", + "| explained_variance | 0.888 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 78589 |\n", + "| policy_loss | 0.253 |\n", + "| std | 0.602 |\n", + "| value_loss | 0.0038 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "id": "OEMHjcAijHgB", - "outputId": "ada91d96-a577-4ebb-a68d-1ee792c71599" - }, - "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Using cpu device\n", + "Episode 472/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 24.3 |\n", - "| ep_rew_mean | 24.3 |\n", + "| ep_len_mean | 3.58 |\n", + "| ep_rew_mean | -0.282 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 420 |\n", + "| fps | 75 |\n", "| iterations | 100 |\n", - "| time_elapsed | 1 |\n", - "| total_timesteps | 500 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 393785 |\n", "| train/ | |\n", - "| entropy_loss | -0.653 |\n", - "| explained_variance | -0.722 |\n", + "| entropy_loss | -6.06 |\n", + "| explained_variance | 0.737 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 99 |\n", - "| policy_loss | 2.18 |\n", - "| value_loss | 17.5 |\n", - "------------------------------------\n", + "| n_updates | 78756 |\n", + "| policy_loss | 0.548 |\n", + "| std | 0.602 |\n", + "| value_loss | 0.0127 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 473/600: Total Reward = -0.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 24 |\n", - "| ep_rew_mean | 24 |\n", + "| ep_len_mean | 3.47 |\n", + "| ep_rew_mean | -0.262 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 454 |\n", - "| iterations | 200 |\n", - "| time_elapsed | 2 |\n", - "| total_timesteps | 1000 |\n", + "| fps | 127 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 394620 |\n", "| train/ | |\n", - "| entropy_loss | -0.619 |\n", - "| explained_variance | -0.0863 |\n", + "| entropy_loss | -6.04 |\n", + "| explained_variance | 0.955 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 199 |\n", - "| policy_loss | 1.87 |\n", - "| value_loss | 8.7 |\n", + "| n_updates | 78923 |\n", + "| policy_loss | -0.109 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.00212 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 474/600: Total Reward = -1.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.97 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 74 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 395455 |\n", + "| train/ | |\n", + "| entropy_loss | -6.01 |\n", + "| explained_variance | 0.984 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79090 |\n", + "| policy_loss | 0.104 |\n", + "| std | 0.598 |\n", + "| value_loss | 0.000562 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 475/600: Total Reward = -0.45\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 22.2 |\n", - "| ep_rew_mean | 22.2 |\n", + "| ep_len_mean | 4.1 |\n", + "| ep_rew_mean | -0.34 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 457 |\n", - "| iterations | 300 |\n", + "| fps | 131 |\n", + "| iterations | 100 |\n", "| time_elapsed | 3 |\n", - "| total_timesteps | 1500 |\n", + "| total_timesteps | 396290 |\n", "| train/ | |\n", - "| entropy_loss | -0.63 |\n", - "| explained_variance | -0.139 |\n", + "| entropy_loss | -5.99 |\n", + "| explained_variance | 0.504 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 299 |\n", - "| policy_loss | 1.65 |\n", - "| value_loss | 7.89 |\n", + "| n_updates | 79257 |\n", + "| policy_loss | -0.614 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.0092 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 476/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.12 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 74 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 397125 |\n", + "| train/ | |\n", + "| entropy_loss | -6.01 |\n", + "| explained_variance | 0.696 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79424 |\n", + "| policy_loss | -0.382 |\n", + "| std | 0.598 |\n", + "| value_loss | 0.00792 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 477/600: Total Reward = -0.35\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 22.7 |\n", - "| ep_rew_mean | 22.7 |\n", + "| ep_len_mean | 4.02 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 474 |\n", - "| iterations | 400 |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", "| time_elapsed | 4 |\n", - "| total_timesteps | 2000 |\n", + "| total_timesteps | 397960 |\n", "| train/ | |\n", - "| entropy_loss | -0.672 |\n", - "| explained_variance | 0.0291 |\n", + "| entropy_loss | -5.99 |\n", + "| explained_variance | 0.921 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 399 |\n", - "| policy_loss | 1.41 |\n", - "| value_loss | 6.71 |\n", - "------------------------------------\n", + "| n_updates | 79591 |\n", + "| policy_loss | -0.296 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.00324 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 478/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 23.7 |\n", - "| ep_rew_mean | 23.7 |\n", + "| ep_len_mean | 3.6 |\n", + "| ep_rew_mean | -0.276 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 489 |\n", - "| iterations | 500 |\n", - "| time_elapsed | 5 |\n", - "| total_timesteps | 2500 |\n", + "| fps | 72 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 398795 |\n", "| train/ | |\n", - "| entropy_loss | -0.508 |\n", - "| explained_variance | 0.0956 |\n", + "| entropy_loss | -5.96 |\n", + "| explained_variance | 0.99 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 499 |\n", - "| policy_loss | 2.08 |\n", - "| value_loss | 6.14 |\n", + "| n_updates | 79758 |\n", + "| policy_loss | -0.257 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00125 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 479/600: Total Reward = -0.44\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.81 |\n", + "| ep_rew_mean | -0.306 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 399630 |\n", + "| train/ | |\n", + "| entropy_loss | -5.96 |\n", + "| explained_variance | 0.786 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 79925 |\n", + "| policy_loss | -0.259 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00329 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 480/600: Total Reward = -0.25\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 26.6 |\n", - "| ep_rew_mean | 26.6 |\n", + "| ep_len_mean | 3.33 |\n", + "| ep_rew_mean | -0.263 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 501 |\n", - "| iterations | 600 |\n", - "| time_elapsed | 5 |\n", - "| total_timesteps | 3000 |\n", + "| fps | 76 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 400465 |\n", "| train/ | |\n", - "| entropy_loss | -0.662 |\n", - "| explained_variance | -0.00099 |\n", + "| entropy_loss | -5.95 |\n", + "| explained_variance | 0.989 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 599 |\n", - "| policy_loss | 1.13 |\n", - "| value_loss | 5.36 |\n", + "| n_updates | 80092 |\n", + "| policy_loss | -0.0605 |\n", + "| std | 0.591 |\n", + "| value_loss | 0.000203 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 481/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.26 |\n", + "| ep_rew_mean | -0.338 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 401300 |\n", + "| train/ | |\n", + "| entropy_loss | -5.9 |\n", + "| explained_variance | 0.97 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80259 |\n", + "| policy_loss | -0.295 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.00301 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 482/600: Total Reward = -0.05\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 28.1 |\n", - "| ep_rew_mean | 28.1 |\n", + "| ep_len_mean | 4.26 |\n", + "| ep_rew_mean | -0.351 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 509 |\n", - "| iterations | 700 |\n", + "| fps | 78 |\n", + "| iterations | 100 |\n", "| time_elapsed | 6 |\n", - "| total_timesteps | 3500 |\n", + "| total_timesteps | 402135 |\n", "| train/ | |\n", - "| entropy_loss | -0.627 |\n", - "| explained_variance | 0.024 |\n", + "| entropy_loss | -5.9 |\n", + "| explained_variance | 0.844 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 699 |\n", - "| policy_loss | -10.7 |\n", - "| value_loss | 642 |\n", + "| n_updates | 80426 |\n", + "| policy_loss | -0.185 |\n", + "| std | 0.589 |\n", + "| value_loss | 0.000988 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 483/600: Total Reward = -0.34\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.354 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 126 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 402970 |\n", + "| train/ | |\n", + "| entropy_loss | -5.93 |\n", + "| explained_variance | 0.54 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80593 |\n", + "| policy_loss | 0.166 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.00156 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 484/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 30 |\n", - "| ep_rew_mean | 30 |\n", + "| ep_len_mean | 3.87 |\n", + "| ep_rew_mean | -0.321 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 516 |\n", - "| iterations | 800 |\n", - "| time_elapsed | 7 |\n", - "| total_timesteps | 4000 |\n", + "| fps | 87 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 403805 |\n", "| train/ | |\n", - "| entropy_loss | -0.663 |\n", - "| explained_variance | 0.00948 |\n", + "| entropy_loss | -5.93 |\n", + "| explained_variance | 0.983 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 799 |\n", - "| policy_loss | 1.05 |\n", - "| value_loss | 4.53 |\n", + "| n_updates | 80760 |\n", + "| policy_loss | -0.169 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00128 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 485/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.83 |\n", + "| ep_rew_mean | -0.318 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 404640 |\n", + "| train/ | |\n", + "| entropy_loss | -5.95 |\n", + "| explained_variance | 0.94 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 80927 |\n", + "| policy_loss | -0.00603 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.000731 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 486/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 33.5 |\n", - "| ep_rew_mean | 33.5 |\n", + "| ep_len_mean | 3.9 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 519 |\n", - "| iterations | 900 |\n", - "| time_elapsed | 8 |\n", - "| total_timesteps | 4500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.648 |\n", - "| explained_variance | -0.00115 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 899 |\n", - "| policy_loss | 0.79 |\n", - "| value_loss | 4.03 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 36.1 |\n", - "| ep_rew_mean | 36.1 |\n", - "| time/ | |\n", - "| fps | 521 |\n", - "| iterations | 1000 |\n", - "| time_elapsed | 9 |\n", - "| total_timesteps | 5000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.623 |\n", - "| explained_variance | -0.000646 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 999 |\n", - "| policy_loss | 0.915 |\n", - "| value_loss | 3.6 |\n", - "-------------------------------------\n", + "| fps | 87 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 405475 |\n", + "| train/ | |\n", + "| entropy_loss | -6 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 81094 |\n", + "| policy_loss | -0.0625 |\n", + "| std | 0.598 |\n", + "| value_loss | 0.000517 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 487/600: Total Reward = -0.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 39.5 |\n", - "| ep_rew_mean | 39.5 |\n", + "| ep_len_mean | 4.97 |\n", + "| ep_rew_mean | -0.406 |\n", + "| success_rate | 0.98 |\n", "| time/ | |\n", - "| fps | 526 |\n", - "| iterations | 1100 |\n", - "| time_elapsed | 10 |\n", - "| total_timesteps | 5500 |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 406310 |\n", "| train/ | |\n", - "| entropy_loss | -0.555 |\n", - "| explained_variance | 0.00192 |\n", + "| entropy_loss | -6.05 |\n", + "| explained_variance | 0.785 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 1099 |\n", - "| policy_loss | 0.82 |\n", - "| value_loss | 3.08 |\n", - "------------------------------------\n", + "| n_updates | 81261 |\n", + "| policy_loss | 0.111 |\n", + "| std | 0.602 |\n", + "| value_loss | 0.0034 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 488/600: Total Reward = -0.39\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 43.3 |\n", - "| ep_rew_mean | 43.3 |\n", + "| ep_len_mean | 4.12 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 530 |\n", - "| iterations | 1200 |\n", - "| time_elapsed | 11 |\n", - "| total_timesteps | 6000 |\n", + "| fps | 96 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 407145 |\n", "| train/ | |\n", - "| entropy_loss | -0.479 |\n", - "| explained_variance | 0.000166 |\n", + "| entropy_loss | -6.06 |\n", + "| explained_variance | 0.86 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 1199 |\n", - "| policy_loss | 1.29 |\n", - "| value_loss | 2.62 |\n", - "------------------------------------\n", + "| n_updates | 81428 |\n", + "| policy_loss | -0.779 |\n", + "| std | 0.602 |\n", + "| value_loss | 0.0267 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 489/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 49.3 |\n", - "| ep_rew_mean | 49.3 |\n", + "| ep_len_mean | 4.12 |\n", + "| ep_rew_mean | -0.325 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 530 |\n", - "| iterations | 1300 |\n", - "| time_elapsed | 12 |\n", - "| total_timesteps | 6500 |\n", + "| fps | 116 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 407980 |\n", "| train/ | |\n", - "| entropy_loss | -0.533 |\n", - "| explained_variance | 0.00133 |\n", + "| entropy_loss | -6 |\n", + "| explained_variance | 0.968 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 1299 |\n", - "| policy_loss | 0.644 |\n", - "| value_loss | 2.19 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 52.9 |\n", - "| ep_rew_mean | 52.9 |\n", - "| time/ | |\n", - "| fps | 533 |\n", - "| iterations | 1400 |\n", - "| time_elapsed | 13 |\n", - "| total_timesteps | 7000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.479 |\n", - "| explained_variance | -0.000509 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1399 |\n", - "| policy_loss | 0.566 |\n", - "| value_loss | 1.82 |\n", - "-------------------------------------\n", + "| n_updates | 81595 |\n", + "| policy_loss | -0.255 |\n", + "| std | 0.598 |\n", + "| value_loss | 0.00208 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 490/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 57 |\n", - "| ep_rew_mean | 57 |\n", + "| ep_len_mean | 3.31 |\n", + "| ep_rew_mean | -0.262 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 536 |\n", - "| iterations | 1500 |\n", - "| time_elapsed | 13 |\n", - "| total_timesteps | 7500 |\n", + "| fps | 107 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 408815 |\n", "| train/ | |\n", - "| entropy_loss | -0.561 |\n", - "| explained_variance | 4.95e-06 |\n", + "| entropy_loss | -6 |\n", + "| explained_variance | 0.975 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 1499 |\n", - "| policy_loss | 0.387 |\n", - "| value_loss | 1.46 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 60.7 |\n", - "| ep_rew_mean | 60.7 |\n", - "| time/ | |\n", - "| fps | 538 |\n", - "| iterations | 1600 |\n", - "| time_elapsed | 14 |\n", - "| total_timesteps | 8000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.376 |\n", - "| explained_variance | -6.74e-05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1599 |\n", - "| policy_loss | 0.585 |\n", - "| value_loss | 1.13 |\n", - "-------------------------------------\n", + "| n_updates | 81762 |\n", + "| policy_loss | 0.301 |\n", + "| std | 0.598 |\n", + "| value_loss | 0.00473 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 491/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 65.3 |\n", - "| ep_rew_mean | 65.3 |\n", + "| ep_len_mean | 3.54 |\n", + "| ep_rew_mean | -0.285 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 541 |\n", - "| iterations | 1700 |\n", - "| time_elapsed | 15 |\n", - "| total_timesteps | 8500 |\n", + "| fps | 124 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 409650 |\n", "| train/ | |\n", - "| entropy_loss | -0.268 |\n", - "| explained_variance | 1.09e-05 |\n", + "| entropy_loss | -5.97 |\n", + "| explained_variance | 0.969 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 1699 |\n", - "| policy_loss | 0.103 |\n", - "| value_loss | 0.85 |\n", - "------------------------------------\n", + "| n_updates | 81929 |\n", + "| policy_loss | 0.355 |\n", + "| std | 0.597 |\n", + "| value_loss | 0.00462 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 492/600: Total Reward = -0.08\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 68.6 |\n", - "| ep_rew_mean | 68.6 |\n", + "| ep_len_mean | 3.79 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 543 |\n", - "| iterations | 1800 |\n", - "| time_elapsed | 16 |\n", - "| total_timesteps | 9000 |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 410485 |\n", "| train/ | |\n", - "| entropy_loss | -0.474 |\n", - "| explained_variance | 0.000158 |\n", + "| entropy_loss | -5.98 |\n", + "| explained_variance | 0.98 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 1799 |\n", - "| policy_loss | 0.164 |\n", - "| value_loss | 0.603 |\n", - "------------------------------------\n", + "| n_updates | 82096 |\n", + "| policy_loss | 0.145 |\n", + "| std | 0.597 |\n", + "| value_loss | 0.00152 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 493/600: Total Reward = -0.51\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 72.6 |\n", - "| ep_rew_mean | 72.6 |\n", + "| ep_len_mean | 4.52 |\n", + "| ep_rew_mean | -0.384 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 545 |\n", - "| iterations | 1900 |\n", - "| time_elapsed | 17 |\n", - "| total_timesteps | 9500 |\n", + "| fps | 109 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 411320 |\n", "| train/ | |\n", - "| entropy_loss | -0.486 |\n", - "| explained_variance | 4.12e-05 |\n", + "| entropy_loss | -5.95 |\n", + "| explained_variance | 0.974 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 1899 |\n", - "| policy_loss | 0.291 |\n", - "| value_loss | 0.398 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 76.6 |\n", - "| ep_rew_mean | 76.6 |\n", - "| time/ | |\n", - "| fps | 547 |\n", - "| iterations | 2000 |\n", - "| time_elapsed | 18 |\n", - "| total_timesteps | 10000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.351 |\n", - "| explained_variance | -0.000275 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1999 |\n", - "| policy_loss | 0.4 |\n", - "| value_loss | 0.236 |\n", - "-------------------------------------\n" + "| n_updates | 82263 |\n", + "| policy_loss | 0.314 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.0022 |\n", + "------------------------------------\n" ] }, { - "data": { - "image/png": "", - "text/plain": [ - "<Figure size 640x480 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import gymnasium as gym\n", - "from stable_baselines3 import A2C\n", - "from stable_baselines3.common.env_util import make_vec_env\n", - "import numpy as np\n", - "\n", - "# Créer un environnement vectorisé\n", - "vec_env = make_vec_env(\"CartPole-v1\", n_envs=1)\n", - "\n", - "# Initialiser le modèle A2C avec la politique MLP\n", - "model = A2C(\"MlpPolicy\", vec_env, verbose=1)\n", - "\n", - "# Entraîner le modèle\n", - "model.learn(total_timesteps=10000)\n", - "\n", - "# Sauvegarder le modèle\n", - "model.save(\"a2c_cartpole\")\n", - "\n", - "# Charger le modèle après l'entraînement\n", - "del model # Supprimer le modèle pour simuler l'enregistrement et le rechargement\n", - "model = A2C.load(\"a2c_cartpole\")\n", - "\n", - "# Réinitialiser l'environnement\n", - "obs = vec_env.reset()\n", - "\n", - "# Variables pour suivre les récompenses et les épisodes\n", - "episode_rewards = [] # Récompenses totales par épisode\n", - "current_rewards = [0] * vec_env.num_envs # Suivre les récompenses de chaque environnement\n", - "num_episodes = 0 # Compter le nombre d'épisodes terminés\n", - "\n", - "# Liste pour stocker les images pour la vidéo\n", - "frames = []\n", - "\n", - "# Exécuter le modèle et capturer des images pour la vidéo\n", - "while num_episodes < 500:\n", - " action, _states = model.predict(obs)\n", - " obs, rewards, dones, info = vec_env.step(action)\n", - " # Mettre à jour les récompenses et vérifier la fin de l'épisode\n", - " for i in range(vec_env.num_envs):\n", - " current_rewards[i] += rewards[i]\n", - "\n", - " if dones[i]:\n", - " episode_rewards.append(current_rewards[i])\n", - " current_rewards[i] = 0 # Réinitialiser pour le prochain épisode\n", - " num_episodes += 1\n", - "\n", - "\n", - "# Fermer l'environnement après l'évaluation\n", - "vec_env.close()\n", - "\n", - "\n", - "# Afficher\n", - "import matplotlib.pyplot as plt\n", - "plt.plot(episode_rewards)\n", - "plt.xlabel(\"Episode\")\n", - "plt.ylabel(\"Total Reward\")\n", - "plt.title(\"Total Reward per Episode - A2C\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CZOZmiAwyqNE" - }, - "source": [ - "upload the model\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 489, - "referenced_widgets": [ - "b30ea1ecb76544e4a4b4b6171e71b728", - "f4987c77de034d6a9a5fbe63f5db9b9e", - "bdbb3e03cdda455ca313b37212800dbb", - "e3e212292e2649f1969c4c31647cb680", - "dced652d1bd14be7ba2b65eae579fba5", - "adc0827744e1479b8833205a44c83e1b", - "9564480530bc447da27e15749428bdfd", - "9286d4d05f354073a56ccab0f3fd0bba", - "852bd4183a5c4e2b9308a8f2d0f7191d", - "297db692b2d749979115c2468eb4fb00", - "fad8b1c404724e4dbd05c747b1bcd5de", - "b515add3f25544cbb2b14b79b2e2770a", - "82fedcb1de0c4311bdab0a773473de38", - "0ed8dfb35a3842bf8be3ae505eeba5d1", - "aef7b10c8f44483c825b7fd9a847f089", - "87403f7d79a64dce87b5b099fc68d0b5", - "16e142d0ae6f4e9b83fcf24b79b9a858", - "8d88faf5346647fcb45737d57d628f41", - "a46532aef6c042b884586a337658727c", - "e43e0ac1d310408b914dddfaee691f9a", - "ecf4c9e4c7cd4ba698a5e8c4aa8b3071", - "253114895704438e802214336c28eaf2", - "b9f0d24741ba48308545746354105fc3", - "31324087708d4cfca89d7a16e3362a6d", - "10bbdba263394d43937799fb02a5308e", - "8f7df43c9c924ec59695450fa933699f", - "db5981fe348443ee9ea1e17a3c86aa34", - "ac6c299386344084ae368388e69f247a", - "87f804cec1814961a3f20d731cf1cb4b", - "dd70225db29144e4908328ba805f4771", - "cfd430b93720484583e1bfa3c6ea944a", - "b93f82c0226747c5a4cf124d92c4a359", - "f824d4ff15ec435f94fd0ffd1950064a", - "71ccd1edcb2149f4a5d79b047db05407", - "c753780ac4be491cb5868d0c65495055", - "8ab12f359394454ea92d8810ff7a3f21", - "8ab306238dc74406928f03c2b665c889", - "d2bc8e91840d498eaaec6040b4fb6356", - "1749d9f40d0348d7ae4709fc273c9121", - "6d42ee9f0e6e4195a8bae8981d14bfea", - "b35b335258a641fb8c52e2d68afdea95", - "f4c0efc7782e41e5966d2ae9eba94a6a", - "64b05aba20224e9caebc639dc6dd7c8f", - "37169f3a5e7048efb2b8dce9dbba1a3c", - "de7f77f12aa0470ab36a5afecd9f14fc", - "1ee3d947c1844b138e29a138801a5e00", - "ad10bcfa05b64376905c996ecc1d455d", - "4e4f9c3ec0644222b01f71e9a768da09", - "c0b5a4535a3b4ca2b0e7c86e1b3ab5ec", - "53e05aff6c98440e852b7d426197edb1", - "666bb151dce54bec81944df29f2a8a3b", - "3d9b748f2b5d460cbba3332dd1ee43b7", - "360923166a664f24971d958a204eb04c", - "331d5ea6d2a743c2a9bea658cda2e80a", - "e8db5be66c3342dc8817b3990fd4fd12", - "290118356e7444249cf0ea7a6bfe37f6", - "bf2f7a273e7a479b8e97cf5dedcaddb8", - "e77629a2b480411692154ed4200c12b4", - "b2c8bbf1f1024fe59e602e89b3908d1d" - ] - }, - "id": "QoXoWAXjyvbM", - "outputId": "b360fad3-510a-4624-923b-e2b8f18e92ca" - }, - "outputs": [ - { - "name": "stdout", "output_type": "stream", + "name": "stderr", "text": [ - "\u001b[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n", - "create a model card and push everything to the hub. It might take up to 1min.\n", - "This is a work in progress: if you encounter a bug, please open an issue.\u001b[0m\n", - "Saving video to C:\\Users\\BYCInfo\\AppData\\Local\\Temp\\tmpee8exbb8\\-step-0-to-step-1000.mp4\n", - "MoviePy - Building video C:\\Users\\BYCInfo\\AppData\\Local\\Temp\\tmpee8exbb8\\-step-0-to-step-1000.mp4.\n", - "MoviePy - Writing video C:\\Users\\BYCInfo\\AppData\\Local\\Temp\\tmpee8exbb8\\-step-0-to-step-1000.mp4\n", - "\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] }, { - "name": "stderr", "output_type": "stream", + "name": "stdout", "text": [ - " \r" + "Episode 494/600: Total Reward = -0.02\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.76 |\n", + "| ep_rew_mean | -0.299 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 412155 |\n", + "| train/ | |\n", + "| entropy_loss | -5.94 |\n", + "| explained_variance | 0.994 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82430 |\n", + "| policy_loss | 0.285 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.00307 |\n", + "------------------------------------\n" ] }, { - "name": "stdout", "output_type": "stream", + "name": "stderr", "text": [ - "MoviePy - Done !\n", - "MoviePy - video ready C:\\Users\\BYCInfo\\AppData\\Local\\Temp\\tmpee8exbb8\\-step-0-to-step-1000.mp4\n", - "\u001b[38;5;1m✘ 'DummyVecEnv' object has no attribute 'video_recorder'\u001b[0m\n", - "\u001b[38;5;1m✘ We are unable to generate a replay of your agent, the package_to_hub\n", - "process continues\u001b[0m\n", - "\u001b[38;5;1m✘ Please open an issue at\n", - "https://github.com/huggingface/huggingface_sb3/issues\u001b[0m\n", - "\u001b[38;5;4mℹ Pushing repo oussamab2n/a2c-cartpole to the Hugging Face Hub\u001b[0m\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "290118356e7444249cf0ea7a6bfe37f6", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "a2c-cartpole.zip: 0%| | 0.00/101k [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bf2f7a273e7a479b8e97cf5dedcaddb8", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "policy.optimizer.pth: 0%| | 0.00/43.4k [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e77629a2b480411692154ed4200c12b4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Upload 3 LFS files: 0%| | 0/3 [00:00<?, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 495/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.76 |\n", + "| ep_rew_mean | -0.304 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 90 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 412990 |\n", + "| train/ | |\n", + "| entropy_loss | -5.92 |\n", + "| explained_variance | -0.572 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82597 |\n", + "| policy_loss | -0.635 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.035 |\n", + "------------------------------------\n" + ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b2c8bbf1f1024fe59e602e89b3908d1d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "policy.pth: 0%| | 0.00/41.1k [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "\u001b[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:\n", - "https://huggingface.co/oussamab2n/a2c-cartpole/tree/main/\u001b[0m\n", - "Model successfully uploaded to Hugging Face Hub!\n" - ] - } - ], - "source": [ - "import gymnasium as gym\n", - "from stable_baselines3 import A2C\n", - "from stable_baselines3.common.monitor import Monitor\n", - "from huggingface_sb3 import package_to_hub\n", - "\n", - "# Load your trained model\n", - "model = A2C.load(\"a2c_cartpole\")\n", - "\n", - "# Create an evaluation environment with render_mode=\"rgb_array\" and wrap it with Monitor\n", - "eval_env = gym.make(\"CartPole-v1\", render_mode=\"rgb_array\")\n", - "eval_env = Monitor(eval_env)\n", - "\n", - "# Define your Hugging Face repository name\n", - "repo_id = \"oussamab2n/a2c-cartpole\"\n", - "\n", - "# Upload model to Hugging Face\n", - "package_to_hub(\n", - " model=model,\n", - " model_name=\"a2c-cartpole\",\n", - " model_architecture=\"A2C\",\n", - " env_id=\"CartPole-v1\",\n", - " eval_env=eval_env,\n", - " repo_id=repo_id,\n", - " commit_message=\"Upload trained A2C model on CartPole-v1\"\n", - ")\n", - "\n", - "print(\"Model successfully uploaded to Hugging Face Hub!\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xko_YulD3c-o" - }, - "source": [ - "evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "referenced_widgets": [ - "b5d5c5d5cb464200b7c955c40cab6b01" + "Episode 496/600: Total Reward = -0.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.35 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 413825 |\n", + "| train/ | |\n", + "| entropy_loss | -5.96 |\n", + "| explained_variance | 0.982 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82764 |\n", + "| policy_loss | 0.125 |\n", + "| std | 0.599 |\n", + "| value_loss | 0.000995 |\n", + "------------------------------------\n" ] }, - "id": "21hr5rXB3bFD", - "outputId": "e8758121-92ff-42d5-8e52-5f62ba053c4f" - }, - "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b5d5c5d5cb464200b7c955c40cab6b01", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "a2c-cartpole.zip: 0%| | 0.00/101k [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Episode 1: Total Reward = 500.0\n", - "Episode 2: Total Reward = 500.0\n", - "Episode 3: Total Reward = 500.0\n", - "Episode 4: Total Reward = 500.0\n", - "Episode 5: Total Reward = 500.0\n", - "Episode 6: Total Reward = 500.0\n", - "Episode 7: Total Reward = 500.0\n", - "Episode 8: Total Reward = 500.0\n", - "Episode 9: Total Reward = 500.0\n", - "Episode 10: Total Reward = 500.0\n", - "Episode 11: Total Reward = 500.0\n", - "Episode 12: Total Reward = 500.0\n", - "Episode 13: Total Reward = 500.0\n", - "Episode 14: Total Reward = 500.0\n", - "Episode 15: Total Reward = 500.0\n", - "Episode 16: Total Reward = 500.0\n", - "Episode 17: Total Reward = 500.0\n", - "Episode 18: Total Reward = 500.0\n", - "Episode 19: Total Reward = 500.0\n", - "Episode 20: Total Reward = 500.0\n", - "Episode 21: Total Reward = 500.0\n", - "Episode 22: Total Reward = 500.0\n", - "Episode 23: Total Reward = 500.0\n", - "Episode 24: Total Reward = 500.0\n", - "Episode 25: Total Reward = 500.0\n", - "Episode 26: Total Reward = 500.0\n", - "Episode 27: Total Reward = 500.0\n", - "Episode 28: Total Reward = 500.0\n", - "Episode 29: Total Reward = 500.0\n", - "Episode 30: Total Reward = 500.0\n", - "Episode 31: Total Reward = 500.0\n", - "Episode 32: Total Reward = 500.0\n", - "Episode 33: Total Reward = 500.0\n", - "Episode 34: Total Reward = 500.0\n", - "Episode 35: Total Reward = 500.0\n", - "Episode 36: Total Reward = 500.0\n", - "Episode 37: Total Reward = 500.0\n", - "Episode 38: Total Reward = 500.0\n", - "Episode 39: Total Reward = 500.0\n", - "Episode 40: Total Reward = 500.0\n", - "Episode 41: Total Reward = 500.0\n", - "Episode 42: Total Reward = 500.0\n", - "Episode 43: Total Reward = 500.0\n", - "Episode 44: Total Reward = 500.0\n", - "Episode 45: Total Reward = 500.0\n", - "Episode 46: Total Reward = 500.0\n", - "Episode 47: Total Reward = 500.0\n", - "Episode 48: Total Reward = 500.0\n", - "Episode 49: Total Reward = 500.0\n", - "Episode 50: Total Reward = 500.0\n", - "Episode 51: Total Reward = 500.0\n", - "Episode 52: Total Reward = 500.0\n", - "Episode 53: Total Reward = 500.0\n", - "Episode 54: Total Reward = 500.0\n", - "Episode 55: Total Reward = 500.0\n", - "Episode 56: Total Reward = 500.0\n", - "Episode 57: Total Reward = 500.0\n", - "Episode 58: Total Reward = 500.0\n", - "Episode 59: Total Reward = 500.0\n", - "Episode 60: Total Reward = 500.0\n", - "Episode 61: Total Reward = 500.0\n", - "Episode 62: Total Reward = 500.0\n", - "Episode 63: Total Reward = 500.0\n", - "Episode 64: Total Reward = 500.0\n", - "Episode 65: Total Reward = 500.0\n", - "Episode 66: Total Reward = 500.0\n", - "Episode 67: Total Reward = 500.0\n", - "Episode 68: Total Reward = 500.0\n", - "Episode 69: Total Reward = 500.0\n", - "Episode 70: Total Reward = 500.0\n", - "Episode 71: Total Reward = 500.0\n", - "Episode 72: Total Reward = 500.0\n", - "Episode 73: Total Reward = 500.0\n", - "Episode 74: Total Reward = 500.0\n", - "Episode 75: Total Reward = 500.0\n", - "Episode 76: Total Reward = 500.0\n", - "Episode 77: Total Reward = 500.0\n", - "Episode 78: Total Reward = 500.0\n", - "Episode 79: Total Reward = 500.0\n", - "Episode 80: Total Reward = 500.0\n", - "Episode 81: Total Reward = 500.0\n", - "Episode 82: Total Reward = 500.0\n", - "Episode 83: Total Reward = 500.0\n", - "Episode 84: Total Reward = 500.0\n", - "Episode 85: Total Reward = 500.0\n", - "Episode 86: Total Reward = 500.0\n", - "Episode 87: Total Reward = 500.0\n", - "Episode 88: Total Reward = 500.0\n", - "Episode 89: Total Reward = 500.0\n", - "Episode 90: Total Reward = 500.0\n", - "Episode 91: Total Reward = 500.0\n", - "Episode 92: Total Reward = 500.0\n", - "Episode 93: Total Reward = 500.0\n", - "Episode 94: Total Reward = 500.0\n", - "Episode 95: Total Reward = 500.0\n", - "Episode 96: Total Reward = 500.0\n", - "Episode 97: Total Reward = 500.0\n", - "Episode 98: Total Reward = 500.0\n", - "Episode 99: Total Reward = 500.0\n", - "Episode 100: Total Reward = 500.0\n" + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] }, { - "data": { - "image/png": "", - "text/plain": [ - "<Figure size 1000x500 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 497/600: Total Reward = -0.02\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.91 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 81 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 414660 |\n", + "| train/ | |\n", + "| entropy_loss | -5.97 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 82931 |\n", + "| policy_loss | -0.0435 |\n", + "| std | 0.599 |\n", + "| value_loss | 0.000421 |\n", + "------------------------------------\n" + ] }, { - "name": "stdout", "output_type": "stream", + "name": "stderr", "text": [ - "\n", - "Evaluation Completed!\n", - "Number of Perfect Episodes (Reward == 500): 100 / 100\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] - } - ], - "source": [ - "import gymnasium as gym\n", - "from stable_baselines3 import A2C\n", - "from stable_baselines3.common.monitor import Monitor\n", - "from huggingface_sb3 import load_from_hub\n", - "import matplotlib.pyplot as plt\n", - "\n", - "# Define your Hugging Face repository and model file\n", - "repo_id = \"oussamab2n/a2c-cartpole\"\n", - "filename = \"a2c-cartpole.zip\"\n", - "\n", - "# Load model from Hugging Face Hub\n", - "model_path = load_from_hub(repo_id=repo_id, filename=filename)\n", - "model = A2C.load(model_path)\n", - "\n", - "# Create evaluation environment\n", - "eval_env = gym.make(\"CartPole-v1\", render_mode=None)\n", - "eval_env = Monitor(eval_env)\n", - "\n", - "# Initialize tracking variables\n", - "num_episodes = 100\n", - "perfect_episodes = 0\n", - "episode_rewards = [] # List to store the reward of each episode\n", - "\n", - "# Run evaluation for 100 episodes\n", - "for episode in range(num_episodes):\n", - " obs, _ = eval_env.reset() # Reset at the start of each episode\n", - " done = False\n", - " total_reward = 0\n", - "\n", - " while not done:\n", - " action, _ = model.predict(obs, deterministic=True)\n", - " obs, reward, terminated, truncated, _ = eval_env.step(action) # Gymnasium returns terminated & truncated\n", - " done = terminated or truncated # Handle both termination and truncation cases\n", - " total_reward += reward\n", - "\n", - " # Store the total reward for each episode\n", - " episode_rewards.append(total_reward)\n", - "\n", - " # Check if the episode reached a total reward of 500\n", - " if total_reward == 500:\n", - " perfect_episodes += 1\n", - "\n", - " print(f\"Episode {episode+1}: Total Reward = {total_reward}\")\n", - "\n", - "# Plot the total reward for each episode\n", - "plt.figure(figsize=(10, 5))\n", - "plt.plot(range(1, num_episodes + 1), episode_rewards, marker=\"o\", linestyle=\"-\", color=\"b\", label=\"Episode Reward\")\n", - "plt.xlabel(\"Episode\")\n", - "plt.ylabel(\"Total Reward\")\n", - "plt.title(\"Total Reward Per Episode\")\n", - "plt.legend()\n", - "plt.grid()\n", - "plt.show()\n", - "\n", - "# Final results\n", - "print(\"\\nEvaluation Completed!\")\n", - "print(f\"Number of Perfect Episodes (Reward == 500): {perfect_episodes} / {num_episodes}\")\n", - "\n", - "# Close the environment\n", - "eval_env.close()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SxieP2wTkr67" - }, - "source": [ - "### Get familiar with Weights & Biases\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "RKDOi7DWvBQE", - "outputId": "ca02cbe6-bd40-4f7c-cdb5-67d2543ae872" - }, - "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.8/20.8 MB\u001b[0m \u001b[31m78.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: \n", - "\u001b[34m\u001b[1mwandb\u001b[0m: No netrc file found, creating one.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: W&B API key is configured. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + "Episode 498/600: Total Reward = -0.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.48 |\n", + "| ep_rew_mean | -0.377 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 415495 |\n", + "| train/ | |\n", + "| entropy_loss | -6.03 |\n", + "| explained_variance | 0.991 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83098 |\n", + "| policy_loss | -0.134 |\n", + "| std | 0.603 |\n", + "| value_loss | 0.00114 |\n", + "------------------------------------\n" ] - } - ], - "source": [ - "! pip install wandb -qU\n", - "#0b197edd6d50d8cc0ed00564436ada87f46084fa\n", - "! wandb login --relogin\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "id": "uRSvh1iYzQUH", - "outputId": "33790305-1d03-432b-9b07-b1332c659f2a" - }, - "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mbenyahiamohammedoussama\u001b[0m (\u001b[33mbenyahiamohammedoussama-ecole-central-lyon\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] }, { - "output_type": "execute_result", - "data": { - "text/plain": [ - "True" - ] - }, - "metadata": {}, - "execution_count": 4 - } - ], - "source": [ - "import wandb\n", - "\n", - "wandb.login()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 125 - }, - "id": "DrrkjdbszeGM", - "outputId": "75e6a500-66ae-4db0-fb46-bc5aee836327" - }, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "Tracking run with wandb version 0.19.7" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 499/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.32 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 76 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 416330 |\n", + "| train/ | |\n", + "| entropy_loss | -6.04 |\n", + "| explained_variance | 0.903 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83265 |\n", + "| policy_loss | 0.243 |\n", + "| std | 0.605 |\n", + "| value_loss | 0.00298 |\n", + "------------------------------------\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "Run data is saved locally in <code>/content/wandb/run-20250222_124637-5aqhfh3z</code>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "Syncing run <strong><a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3/runs/5aqhfh3z' target=\"_blank\">distinctive-wave-6</a></strong> to <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 500/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.8 |\n", + "| ep_rew_mean | -0.294 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 106 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 417165 |\n", + "| train/ | |\n", + "| entropy_loss | -6.06 |\n", + "| explained_variance | 0.918 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83432 |\n", + "| policy_loss | 0.301 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.0026 |\n", + "------------------------------------\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - " View project at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3</a>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - " View run at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3/runs/5aqhfh3z' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3/runs/5aqhfh3z</a>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 501/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.331 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 77 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 418000 |\n", + "| train/ | |\n", + "| entropy_loss | -6.05 |\n", + "| explained_variance | 0.452 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 83599 |\n", + "| policy_loss | -0.525 |\n", + "| std | 0.604 |\n", + "| value_loss | 0.0271 |\n", + "------------------------------------\n" + ] }, { - "output_type": "execute_result", - "data": { - "text/html": [ - "<button onClick=\"this.nextSibling.style.display='block';this.style.display='none';\">Display W&B run</button><iframe src='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3/runs/5aqhfh3z?jupyter=true' style='border:none;width:100%;height:420px;display:none;'></iframe>" - ], - "text/plain": [ - "<wandb.sdk.wandb_run.Run at 0x7c057798c4d0>" - ] - }, - "metadata": {}, - "execution_count": 5 - } - ], - "source": [ - "# Initialize a new run\n", - "wandb.init(project=\"wb_sb3\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "id": "s6edpedP0dor", - "outputId": "47300af2-3b08-4a47-8a18-ea5175a78c56" - }, - "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Using cpu device\n", + "Episode 502/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 58.2 |\n", - "| ep_rew_mean | 58.2 |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.345 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 554 |\n", + "| fps | 124 |\n", "| iterations | 100 |\n", - "| time_elapsed | 0 |\n", - "| total_timesteps | 500 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 418835 |\n", "| train/ | |\n", - "| entropy_loss | -0.544 |\n", - "| explained_variance | -0.208 |\n", + "| entropy_loss | -6.02 |\n", + "| explained_variance | 0.964 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 99 |\n", - "| policy_loss | 1.11 |\n", - "| value_loss | 10.2 |\n", - "------------------------------------\n", + "| n_updates | 83766 |\n", + "| policy_loss | -0.0578 |\n", + "| std | 0.603 |\n", + "| value_loss | 0.000854 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 503/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 58.7 |\n", - "| ep_rew_mean | 58.7 |\n", + "| ep_len_mean | 3.76 |\n", + "| ep_rew_mean | -0.308 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 553 |\n", - "| iterations | 200 |\n", - "| time_elapsed | 1 |\n", - "| total_timesteps | 1000 |\n", + "| fps | 77 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 419670 |\n", "| train/ | |\n", - "| entropy_loss | -0.493 |\n", - "| explained_variance | -0.311 |\n", + "| entropy_loss | -5.98 |\n", + "| explained_variance | 0.125 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 199 |\n", - "| policy_loss | 1.79 |\n", - "| value_loss | 8.13 |\n", - "------------------------------------\n", + "| n_updates | 83933 |\n", + "| policy_loss | -0.247 |\n", + "| std | 0.6 |\n", + "| value_loss | 0.00757 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 504/600: Total Reward = -0.62\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 53.3 |\n", - "| ep_rew_mean | 53.3 |\n", + "| ep_len_mean | 3.78 |\n", + "| ep_rew_mean | -0.304 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 553 |\n", - "| iterations | 300 |\n", - "| time_elapsed | 2 |\n", - "| total_timesteps | 1500 |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 420505 |\n", "| train/ | |\n", - "| entropy_loss | -0.572 |\n", - "| explained_variance | 0.00166 |\n", + "| entropy_loss | -6.01 |\n", + "| explained_variance | 0.993 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 299 |\n", - "| policy_loss | 1.24 |\n", - "| value_loss | 5.54 |\n", + "| n_updates | 84100 |\n", + "| policy_loss | 0.44 |\n", + "| std | 0.602 |\n", + "| value_loss | 0.00688 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 505/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.48 |\n", + "| ep_rew_mean | -0.285 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 87 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 421340 |\n", + "| train/ | |\n", + "| entropy_loss | -6.04 |\n", + "| explained_variance | 0.936 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84267 |\n", + "| policy_loss | 0.117 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.00194 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 506/600: Total Reward = -0.35\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 56.3 |\n", - "| ep_rew_mean | 56.3 |\n", + "| ep_len_mean | 3.99 |\n", + "| ep_rew_mean | -0.314 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 555 |\n", - "| iterations | 400 |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", "| time_elapsed | 3 |\n", - "| total_timesteps | 2000 |\n", + "| total_timesteps | 422175 |\n", "| train/ | |\n", - "| entropy_loss | -0.629 |\n", - "| explained_variance | -0.00653 |\n", + "| entropy_loss | -6.05 |\n", + "| explained_variance | 0.956 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 399 |\n", - "| policy_loss | 0.855 |\n", - "| value_loss | 5.41 |\n", - "------------------------------------\n", + "| n_updates | 84434 |\n", + "| policy_loss | 0.474 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.00556 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 507/600: Total Reward = -0.41\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 58 |\n", - "| ep_rew_mean | 58 |\n", + "| ep_len_mean | 3.74 |\n", + "| ep_rew_mean | -0.307 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 557 |\n", - "| iterations | 500 |\n", + "| fps | 103 |\n", + "| iterations | 100 |\n", "| time_elapsed | 4 |\n", - "| total_timesteps | 2500 |\n", + "| total_timesteps | 423010 |\n", "| train/ | |\n", - "| entropy_loss | -0.343 |\n", - "| explained_variance | 0.00255 |\n", + "| entropy_loss | -6.04 |\n", + "| explained_variance | 0.915 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 499 |\n", - "| policy_loss | 0.0657 |\n", - "| value_loss | 382 |\n", - "------------------------------------\n", + "| n_updates | 84601 |\n", + "| policy_loss | -0.515 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.00687 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 508/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 63.6 |\n", - "| ep_rew_mean | 63.6 |\n", + "| ep_len_mean | 3.62 |\n", + "| ep_rew_mean | -0.291 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 543 |\n", - "| iterations | 600 |\n", - "| time_elapsed | 5 |\n", - "| total_timesteps | 3000 |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 423845 |\n", "| train/ | |\n", - "| entropy_loss | -0.382 |\n", - "| explained_variance | 0.00138 |\n", + "| entropy_loss | -6.02 |\n", + "| explained_variance | 0.931 |\n", "| learning_rate | 0.0007 |\n", - "| n_updates | 599 |\n", - "| policy_loss | 0.461 |\n", - "| value_loss | 256 |\n", - "------------------------------------\n", + "| n_updates | 84768 |\n", + "| policy_loss | 0.248 |\n", + "| std | 0.604 |\n", + "| value_loss | 0.00308 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 509/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", "------------------------------------\n", "| rollout/ | |\n", - "| ep_len_mean | 64.6 |\n", - "| ep_rew_mean | 64.6 |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", "| time/ | |\n", - "| fps | 530 |\n", - "| iterations | 700 |\n", - "| time_elapsed | 6 |\n", - "| total_timesteps | 3500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.51 |\n", - "| explained_variance | 0.00348 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 699 |\n", - "| policy_loss | 0.722 |\n", - "| value_loss | 3.78 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 67.8 |\n", - "| ep_rew_mean | 67.8 |\n", - "| time/ | |\n", - "| fps | 512 |\n", - "| iterations | 800 |\n", - "| time_elapsed | 7 |\n", - "| total_timesteps | 4000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.409 |\n", - "| explained_variance | -0.000384 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 799 |\n", - "| policy_loss | 1.49 |\n", - "| value_loss | 3.25 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 72.2 |\n", - "| ep_rew_mean | 72.2 |\n", - "| time/ | |\n", - "| fps | 495 |\n", - "| iterations | 900 |\n", - "| time_elapsed | 9 |\n", - "| total_timesteps | 4500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.463 |\n", - "| explained_variance | -0.00283 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 899 |\n", - "| policy_loss | 0.622 |\n", - "| value_loss | 2.78 |\n", - "------------------------------------\n", - "------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 75.9 |\n", - "| ep_rew_mean | 75.9 |\n", - "| time/ | |\n", - "| fps | 501 |\n", - "| iterations | 1000 |\n", - "| time_elapsed | 9 |\n", - "| total_timesteps | 5000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.306 |\n", - "| explained_variance | 0.00123 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 999 |\n", - "| policy_loss | 0.944 |\n", - "| value_loss | 2.34 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 82.3 |\n", - "| ep_rew_mean | 82.3 |\n", - "| time/ | |\n", - "| fps | 505 |\n", - "| iterations | 1100 |\n", - "| time_elapsed | 10 |\n", - "| total_timesteps | 5500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.447 |\n", - "| explained_variance | -0.000973 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1099 |\n", - "| policy_loss | 0.402 |\n", - "| value_loss | 1.91 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 88.6 |\n", - "| ep_rew_mean | 88.6 |\n", - "| time/ | |\n", - "| fps | 510 |\n", - "| iterations | 1200 |\n", - "| time_elapsed | 11 |\n", - "| total_timesteps | 6000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.557 |\n", - "| explained_variance | -5.52e-05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1199 |\n", - "| policy_loss | 0.328 |\n", - "| value_loss | 1.52 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 94.6 |\n", - "| ep_rew_mean | 94.6 |\n", - "| time/ | |\n", - "| fps | 514 |\n", - "| iterations | 1300 |\n", - "| time_elapsed | 12 |\n", - "| total_timesteps | 6500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.471 |\n", - "| explained_variance | -0.000189 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1299 |\n", - "| policy_loss | 0.475 |\n", - "| value_loss | 1.17 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 99.8 |\n", - "| ep_rew_mean | 99.8 |\n", - "| time/ | |\n", - "| fps | 517 |\n", - "| iterations | 1400 |\n", - "| time_elapsed | 13 |\n", - "| total_timesteps | 7000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.512 |\n", - "| explained_variance | 0.000282 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1399 |\n", - "| policy_loss | 0.422 |\n", - "| value_loss | 0.883 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 101 |\n", - "| ep_rew_mean | 101 |\n", - "| time/ | |\n", - "| fps | 520 |\n", - "| iterations | 1500 |\n", - "| time_elapsed | 14 |\n", - "| total_timesteps | 7500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.574 |\n", - "| explained_variance | -1.93e-05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1499 |\n", - "| policy_loss | 0.33 |\n", - "| value_loss | 0.632 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 104 |\n", - "| ep_rew_mean | 104 |\n", - "| time/ | |\n", - "| fps | 522 |\n", - "| iterations | 1600 |\n", - "| time_elapsed | 15 |\n", - "| total_timesteps | 8000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.38 |\n", - "| explained_variance | -0.000158 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1599 |\n", - "| policy_loss | 0.321 |\n", - "| value_loss | 0.42 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 113 |\n", - "| ep_rew_mean | 113 |\n", - "| time/ | |\n", - "| fps | 523 |\n", - "| iterations | 1700 |\n", - "| time_elapsed | 16 |\n", - "| total_timesteps | 8500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.472 |\n", - "| explained_variance | -2.01e-05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1699 |\n", - "| policy_loss | 0.135 |\n", - "| value_loss | 0.253 |\n", - "-------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 117 |\n", - "| ep_rew_mean | 117 |\n", - "| time/ | |\n", - "| fps | 526 |\n", - "| iterations | 1800 |\n", - "| time_elapsed | 17 |\n", - "| total_timesteps | 9000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.465 |\n", - "| explained_variance | -0.000141 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1799 |\n", - "| policy_loss | 0.159 |\n", - "| value_loss | 0.133 |\n", - "-------------------------------------\n", - "------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 118 |\n", - "| ep_rew_mean | 118 |\n", - "| time/ | |\n", - "| fps | 529 |\n", - "| iterations | 1900 |\n", - "| time_elapsed | 17 |\n", - "| total_timesteps | 9500 |\n", - "| train/ | |\n", - "| entropy_loss | -0.54 |\n", - "| explained_variance | 4.63e-05 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1899 |\n", - "| policy_loss | 0.108 |\n", - "| value_loss | 0.0489 |\n", - "------------------------------------\n", - "-------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 122 |\n", - "| ep_rew_mean | 122 |\n", - "| time/ | |\n", - "| fps | 531 |\n", - "| iterations | 2000 |\n", - "| time_elapsed | 18 |\n", - "| total_timesteps | 10000 |\n", - "| train/ | |\n", - "| entropy_loss | -0.439 |\n", - "| explained_variance | -2.03e-06 |\n", - "| learning_rate | 0.0007 |\n", - "| n_updates | 1999 |\n", - "| policy_loss | 0.0152 |\n", - "| value_loss | 0.00595 |\n", - "-------------------------------------\n", - "Model saved successfully!\n" + "| fps | 133 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 424680 |\n", + "| train/ | |\n", + "| entropy_loss | -6.03 |\n", + "| explained_variance | 0.904 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 84935 |\n", + "| policy_loss | -0.0218 |\n", + "| std | 0.606 |\n", + "| value_loss | 0.00205 |\n", + "------------------------------------\n" ] }, { - "data": { - "image/png": "", - "text/plain": [ - "<Figure size 1000x500 with 1 Axes>" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "data": { - "text/html": [], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 510/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.321 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 425515 |\n", + "| train/ | |\n", + "| entropy_loss | -5.94 |\n", + "| explained_variance | 0.975 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85102 |\n", + "| policy_loss | -0.26 |\n", + "| std | 0.599 |\n", + "| value_loss | 0.0025 |\n", + "------------------------------------\n" + ] }, { - "data": { - "text/html": [ - "<br> <style><br> .wandb-row {<br> display: flex;<br> flex-direction: row;<br> flex-wrap: wrap;<br> justify-content: flex-start;<br> width: 100%;<br> }<br> .wandb-col {<br> display: flex;<br> flex-direction: column;<br> flex-basis: 100%;<br> flex: 1;<br> padding: 10px;<br> }<br> </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>episode</td><td>▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██</td></tr><tr><td>total_reward</td><td>█▅█▇█▆▆██▃█▄▇▅▂▃▆▅█▆▅▃▄▂▆▂▃██▁▆█▆▆██▄█▂▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>episode</td><td>500</td></tr><tr><td>total_reward</td><td>414</td></tr></table><br/></div></div>" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "data": { - "text/html": [ - " View run <strong style=\"color:#cdcd00\">major-oath-3</strong> at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3/runs/h0conaa8' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3/runs/h0conaa8</a><br> View project at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/wb_sb3</a><br>Synced 5 W&B file(s), 1 media file(s), 0 artifact file(s) and 0 other file(s)" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 511/600: Total Reward = -0.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.69 |\n", + "| ep_rew_mean | -0.297 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 128 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 426350 |\n", + "| train/ | |\n", + "| entropy_loss | -5.9 |\n", + "| explained_variance | 0.0287 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85269 |\n", + "| policy_loss | 0.41 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.00898 |\n", + "------------------------------------\n" + ] }, { - "data": { - "text/html": [ - "Find logs at: <code>./wandb/run-20250210_220810-h0conaa8/logs</code>" - ], - "text/plain": [ - "<IPython.core.display.HTML object>" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import gymnasium as gym\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from stable_baselines3 import A2C\n", - "from stable_baselines3.common.monitor import Monitor\n", - "from stable_baselines3.common.vec_env import DummyVecEnv\n", - "import wandb\n", - "\n", - "# Initialize W&B for experiment tracking\n", - "wandb.init(project=\"wb_sb3\", config={\"learning_rate\": 0.001, \"total_timesteps\": 100000}, sync_tensorboard=True)\n", - "\n", - "# Create and wrap the CartPole environment\n", - "env = gym.make(\"CartPole-v1\")\n", - "env = Monitor(env)\n", - "env = DummyVecEnv([lambda: env])\n", - "\n", - "# Initialize the A2C model\n", - "model = A2C(\"MlpPolicy\", env, verbose=1)\n", - "\n", - "# Train the model\n", - "model.learn(total_timesteps=100000)\n", - "\n", - "# Save the trained model\n", - "model.save(\"a2c_cartpole_WB\")\n", - "print(\"Model saved successfully!\")\n", - "\n", - "num_episodes = 500 # Number of learn episodes\n", - "episode_rewards = []\n", - "\n", - "for episode in range(num_episodes):\n", - " obs = env.reset()\n", - " done = False\n", - " total_reward = 0\n", - "\n", - " while not done:\n", - " action, _states = model.predict(obs)\n", - " obs, reward, done, info = env.step(action)\n", - "\n", - " total_reward += reward[0]\n", - " done = done[0]\n", - "\n", - " episode_rewards.append(total_reward)\n", - "\n", - "# Log the total rewards of each episode to WB\n", - "for i, reward in enumerate(episode_rewards):\n", - " wandb.log({\"episode\": i + 1, \"total_reward\": reward})\n", - "\n", - "# Plot the episode rewards\n", - "plt.figure(figsize=(10, 5))\n", - "plt.plot(range(1, num_episodes + 1), episode_rewards, marker=\"o\", linestyle=\"-\", color=\"b\", label=\"Total Reward per Episode\")\n", - "plt.xlabel(\"Episode\")\n", - "plt.ylabel(\"Total Reward\")\n", - "plt.title(\"Total Reward per Episode during Evaluation\")\n", - "plt.legend()\n", - "plt.grid(True)\n", - "plt.savefig(\"episode_rewards_plot.png\") # Save the plot as an image\n", - "plt.show()\n", - "\n", - "# Log the plot to WB\n", - "wandb.log({\"Episode Rewards Plot\": wandb.Image(\"episode_rewards_plot.png\")})\n", - "\n", - "# Close the environment\n", - "env.close()\n", - "\n", - "# Finish WB run\n", - "wandb.finish()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Dd80KFQO6ncb" - }, - "source": [ - "upload" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 489, - "referenced_widgets": [ - "105a958acd644ed2bcab9791de217aed", - "b5ddd303cc494ecd865fab744c7c7017", - "477c8ee0c2d94a5f9113b04e7e564aae", - "22963ccded02458d8047644dea108c0f", - "9cd838bc92c144108ad7fb2717dfa058", - "8bf9caee159f4f03b4b9be6500f7118a", - "5dfdcec102ef454893d2337010e465b7", - "d2b3dfcf04a44fb692b800cf0bb7f802", - "a0fffd5ee818454da360030f6e1466ae", - "5a73da851a4443208a79bfadb025bec4", - "10f2c206b3b6403f871d78f6f665c937", - "f48bd46317b54ebc9d55eee77b4eb165", - "dac9eb72052f448fbb17389d3acfbbfb", - "a7098ce07385468ba0b61a39601155ca", - "03743e7926594213aaa8a4ea7f149e47", - "e097d0070f30469abbd542994e74bf81", - "451f1b9792f64fa68eb0f68c0aefbc0d", - "ba48466f16a44983b96b20123c966be9", - "a6d98acbf717475dbfd80407d9256f5d", - "57b8419adedf4abe8d950d18057aa7e5", - "4916f07bcd1c47339946d15b1b44285b", - "1c9b00b0286f45a1a38ad992723d0efa", - "3a3f1bb3be714c71bc71a7db8fc31189", - "28e092db03774e4db332da6f1f0d3dd6", - "b899e2f587a34972ae41853ae62236fe", - "23f1b8afdbbf4dc3a3ddaaae094a287a", - "aa6c1072bcd94715a93b9c0f598981a5", - "e64bd642eab44cffa9d8c8e3a64103e7", - "053eca19faa24c35833c132c1af7b94c", - "536343b057e64933a62fe9aac074da24", - "333eaf4f77744246b98456a844fcca2d", - "7595680d757448eab257e813cbf506b1", - "b99ab3e32f10455398319b8c3e99ed18", - "d996b268e9c24f85a73ca1017f8cc5fa", - "cdca8fecc2cc412282c4f41ba141017d", - "508ba1fe83a24b83a6b26626ff71f22f", - "c135ded2efad4a0d8bddea2e51bc6459", - "9b4427af69fd454eb54eaf678465ceee", - "406df9e39d29497dab95d0890ebc006f", - "da0b9ac258dd482fa342021ceb684aa7", - "cd24c5adf102474493f9b7642a65dd57", - "8711ae2fa5174670afb295d8c788d2ac", - "f183c2895da7440a80edc87f5a130fee", - "8eecebc2ee5a42978670c9f2b0e5f592", - "d7cfb3fea4444e6ca69e7bdb4f86d40d", - "c8df02b3721d4a97b6c6336e896ff179", - "c48c8dc4a77f4dfaa2adede6518d373c", - "ea68977e2bc642358854ed2313d4c9bc", - "13f76670252d4fcbaf43c831b8fe1ee7", - "f22b7b1e400a4ed98dc754b073a9a40a", - "19a6e413bee74cc083c01d1b5d8dfee6", - "0c6aaba7ffb04fe2a5babdcb5c3e955d", - "b6d13ee968fa4473baecb877b8f302f8", - "7b3c7a0ac0424cb7af4f7e1dfbdc513b", - "79a31c28dd1e4bb8b2e2252b5fd1a940" - ] - }, - "id": "8OIhM8GAm8gT", - "outputId": "5e91e213-44a8-41dc-c203-44f3c2358283" - }, - "outputs": [ + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 512/600: Total Reward = -0.36\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.94 |\n", + "| ep_rew_mean | -0.308 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 126 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 427185 |\n", + "| train/ | |\n", + "| entropy_loss | -5.89 |\n", + "| explained_variance | 0.802 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85436 |\n", + "| policy_loss | -0.217 |\n", + "| std | 0.593 |\n", + "| value_loss | 0.00354 |\n", + "------------------------------------\n" + ] + }, { - "name": "stdout", "output_type": "stream", + "name": "stderr", "text": [ - "\u001b[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n", - "create a model card and push everything to the hub. It might take up to 1min.\n", - "This is a work in progress: if you encounter a bug, please open an issue.\u001b[0m\n", - "Saving video to /tmp/tmprujl5nt1/-step-0-to-step-1000.mp4\n", - "Moviepy - Building video /tmp/tmprujl5nt1/-step-0-to-step-1000.mp4.\n", - "Moviepy - Writing video /tmp/tmprujl5nt1/-step-0-to-step-1000.mp4\n", - "\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] }, { - "name": "stderr", "output_type": "stream", - "text": [] + "name": "stdout", + "text": [ + "Episode 513/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.58 |\n", + "| ep_rew_mean | -0.375 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 428020 |\n", + "| train/ | |\n", + "| entropy_loss | -5.91 |\n", + "| explained_variance | 0.832 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85603 |\n", + "| policy_loss | -0.0776 |\n", + "| std | 0.594 |\n", + "| value_loss | 0.00141 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { + "output_type": "stream", "name": "stdout", + "text": [ + "Episode 514/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.65 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 108 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 428855 |\n", + "| train/ | |\n", + "| entropy_loss | -5.92 |\n", + "| explained_variance | 0.964 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85770 |\n", + "| policy_loss | 0.158 |\n", + "| std | 0.595 |\n", + "| value_loss | 0.00132 |\n", + "------------------------------------\n" + ] + }, + { "output_type": "stream", + "name": "stderr", "text": [ - "Moviepy - Done !\n", - "Moviepy - video ready /tmp/tmprujl5nt1/-step-0-to-step-1000.mp4\n", - "\u001b[38;5;1m✘ 'DummyVecEnv' object has no attribute 'video_recorder'\u001b[0m\n", - "\u001b[38;5;1m✘ We are unable to generate a replay of your agent, the package_to_hub\n", - "process continues\u001b[0m\n", - "\u001b[38;5;1m✘ Please open an issue at\n", - "https://github.com/huggingface/huggingface_sb3/issues\u001b[0m\n", - "\u001b[38;5;4mℹ Pushing repo oussamab2n/a2c-cartpole-wb to the Hugging Face Hub\u001b[0m\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "105a958acd644ed2bcab9791de217aed", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "policy.pth: 0%| | 0.00/41.1k [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 515/600: Total Reward = -0.17\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.79 |\n", + "| ep_rew_mean | -0.305 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 127 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 429690 |\n", + "| train/ | |\n", + "| entropy_loss | -5.93 |\n", + "| explained_variance | 0.997 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 85937 |\n", + "| policy_loss | 0.19 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.00169 |\n", + "------------------------------------\n" + ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f48bd46317b54ebc9d55eee77b4eb165", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "policy.optimizer.pth: 0%| | 0.00/43.4k [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3a3f1bb3be714c71bc71a7db8fc31189", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "pytorch_variables.pth: 0%| | 0.00/864 [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 516/600: Total Reward = -0.50\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.96 |\n", + "| ep_rew_mean | -0.323 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 80 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 430525 |\n", + "| train/ | |\n", + "| entropy_loss | -5.92 |\n", + "| explained_variance | 0.216 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86104 |\n", + "| policy_loss | -0.124 |\n", + "| std | 0.596 |\n", + "| value_loss | 0.00559 |\n", + "------------------------------------\n" + ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d996b268e9c24f85a73ca1017f8cc5fa", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Upload 4 LFS files: 0%| | 0/4 [00:00<?, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d7cfb3fea4444e6ca69e7bdb4f86d40d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "a2c-cartpole-wb.zip: 0%| | 0.00/101k [00:00<?, ?B/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 517/600: Total Reward = -0.29\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.94 |\n", + "| ep_rew_mean | -0.321 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 431360 |\n", + "| train/ | |\n", + "| entropy_loss | -5.87 |\n", + "| explained_variance | 0.624 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86271 |\n", + "| policy_loss | -0.381 |\n", + "| std | 0.592 |\n", + "| value_loss | 0.0108 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { + "output_type": "stream", "name": "stdout", + "text": [ + "Episode 518/600: Total Reward = -0.11\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.88 |\n", + "| ep_rew_mean | -0.319 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 73 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 432195 |\n", + "| train/ | |\n", + "| entropy_loss | -5.83 |\n", + "| explained_variance | 0.871 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86438 |\n", + "| policy_loss | 0.59 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.0148 |\n", + "------------------------------------\n" + ] + }, + { "output_type": "stream", + "name": "stderr", "text": [ - "\u001b[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:\n", - "https://huggingface.co/oussamab2n/a2c-cartpole-wb/tree/main/\u001b[0m\n", - "✅ Model successfully uploaded to Hugging Face Hub!\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] - } - ], - "source": [ - "from huggingface_sb3 import package_to_hub\n", - "\n", - "\n", - "repo_id = \"oussamab2n/a2c-cartpole-wb\"\n", - "\n", - "# Create environment\n", - "eval_env = gym.make(\"CartPole-v1\",render_mode=\"rgb_array\")\n", - "eval_env = Monitor(eval_env)\n", - "eval_env = DummyVecEnv([lambda: eval_env]) # Wrap environment\n", - "\n", - "# Upload model to Hugging Face\n", - "package_to_hub(\n", - " model=model,\n", - " model_name=\"a2c-cartpole-wb\",\n", - " model_architecture=\"A2C\",\n", - " env_id=\"CartPole-v1\",\n", - " eval_env=eval_env,\n", - " repo_id=repo_id,\n", - " commit_message=\"Upload A2C model trained on CartPole-v1 with W&B logging\"\n", - ")\n", - "\n", - "print(\"Model successfully uploaded to Hugging Face Hub!\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-IY2wgfc6p8U" - }, - "source": [ - "evaluate" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 }, - "id": "34h0joTT7ooj", - "outputId": "4284581c-00c1-49a9-f298-807495fb2fcc" - }, - "outputs": [ { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "Tracking run with wandb version 0.19.7" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 519/600: Total Reward = -0.44\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.348 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 130 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 433030 |\n", + "| train/ | |\n", + "| entropy_loss | -5.86 |\n", + "| explained_variance | 0.995 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86605 |\n", + "| policy_loss | -0.143 |\n", + "| std | 0.589 |\n", + "| value_loss | 0.000786 |\n", + "------------------------------------\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "Run data is saved locally in <code>/content/wandb/run-20250222_130200-si01h5dj</code>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "Syncing run <strong><a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation/runs/si01h5dj' target=\"_blank\">A2C-CartPole</a></strong> to <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 520/600: Total Reward = -0.12\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.76 |\n", + "| ep_rew_mean | -0.388 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 77 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 433865 |\n", + "| train/ | |\n", + "| entropy_loss | -5.82 |\n", + "| explained_variance | 0.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86772 |\n", + "| policy_loss | -0.044 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.00096 |\n", + "------------------------------------\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - " View project at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation</a>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - " View run at <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation/runs/si01h5dj' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation/runs/si01h5dj</a>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 521/600: Total Reward = -0.67\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.338 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 127 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 434700 |\n", + "| train/ | |\n", + "| entropy_loss | -5.82 |\n", + "| explained_variance | 0.869 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 86939 |\n", + "| policy_loss | 0.328 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.00463 |\n", + "------------------------------------\n" + ] }, { "output_type": "stream", "name": "stderr", "text": [ - "/usr/local/lib/python3.11/dist-packages/gymnasium/wrappers/rendering.py:283: UserWarning: \u001b[33mWARN: Overwriting existing videos at /content/videos1 folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)\u001b[0m\n", - " logger.warn(\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Episode 1: Total Reward = 500.0\n", - "Episode 2: Total Reward = 500.0\n", - "Episode 3: Total Reward = 500.0\n", - "Episode 4: Total Reward = 500.0\n", - "Episode 5: Total Reward = 500.0\n", - "Episode 6: Total Reward = 500.0\n", - "Episode 7: Total Reward = 500.0\n", - "Episode 8: Total Reward = 500.0\n", - "Episode 9: Total Reward = 500.0\n", - "Episode 10: Total Reward = 500.0\n", - "Episode 11: Total Reward = 500.0\n", - "Episode 12: Total Reward = 500.0\n", - "Episode 13: Total Reward = 500.0\n", - "Episode 14: Total Reward = 500.0\n", - "Episode 15: Total Reward = 500.0\n", - "Episode 16: Total Reward = 500.0\n", - "Episode 17: Total Reward = 500.0\n", - "Episode 18: Total Reward = 500.0\n", - "Episode 19: Total Reward = 500.0\n", - "Episode 20: Total Reward = 500.0\n", - "Episode 21: Total Reward = 500.0\n", - "Episode 22: Total Reward = 500.0\n", - "Episode 23: Total Reward = 500.0\n", - "Episode 24: Total Reward = 500.0\n", - "Episode 25: Total Reward = 500.0\n", - "Episode 26: Total Reward = 500.0\n", - "Episode 27: Total Reward = 500.0\n", - "Episode 28: Total Reward = 500.0\n", - "Episode 29: Total Reward = 500.0\n", - "Episode 30: Total Reward = 500.0\n", - "Episode 31: Total Reward = 500.0\n", - "Episode 32: Total Reward = 500.0\n", - "Episode 33: Total Reward = 500.0\n", - "Episode 34: Total Reward = 500.0\n", - "Episode 35: Total Reward = 500.0\n", - "Episode 36: Total Reward = 500.0\n", - "Episode 37: Total Reward = 500.0\n", - "Episode 38: Total Reward = 500.0\n", - "Episode 39: Total Reward = 500.0\n", - "Episode 40: Total Reward = 500.0\n", - "Episode 41: Total Reward = 500.0\n", - "Episode 42: Total Reward = 500.0\n", - "Episode 43: Total Reward = 500.0\n", - "Episode 44: Total Reward = 500.0\n", - "Episode 45: Total Reward = 500.0\n", - "Episode 46: Total Reward = 500.0\n", - "Episode 47: Total Reward = 500.0\n", - "Episode 48: Total Reward = 500.0\n", - "Episode 49: Total Reward = 500.0\n", - "Episode 50: Total Reward = 500.0\n", - "Episode 51: Total Reward = 500.0\n", - "Episode 52: Total Reward = 500.0\n", - "Episode 53: Total Reward = 500.0\n", - "Episode 54: Total Reward = 500.0\n", - "Episode 55: Total Reward = 500.0\n", - "Episode 56: Total Reward = 500.0\n", - "Episode 57: Total Reward = 500.0\n", - "Episode 58: Total Reward = 500.0\n", - "Episode 59: Total Reward = 500.0\n", - "Episode 60: Total Reward = 500.0\n", - "Episode 61: Total Reward = 500.0\n", - "Episode 62: Total Reward = 500.0\n", - "Episode 63: Total Reward = 500.0\n", - "Episode 64: Total Reward = 500.0\n", - "Episode 65: Total Reward = 500.0\n", - "Episode 66: Total Reward = 500.0\n", - "Episode 67: Total Reward = 500.0\n", - "Episode 68: Total Reward = 500.0\n", - "Episode 69: Total Reward = 500.0\n", - "Episode 70: Total Reward = 500.0\n", - "Episode 71: Total Reward = 500.0\n", - "Episode 72: Total Reward = 500.0\n", - "Episode 73: Total Reward = 500.0\n", - "Episode 74: Total Reward = 500.0\n", - "Episode 75: Total Reward = 500.0\n", - "Episode 76: Total Reward = 500.0\n", - "Episode 77: Total Reward = 500.0\n", - "Episode 78: Total Reward = 500.0\n", - "Episode 79: Total Reward = 500.0\n", - "Episode 80: Total Reward = 500.0\n", - "Episode 81: Total Reward = 500.0\n", - "Episode 82: Total Reward = 500.0\n", - "Episode 83: Total Reward = 500.0\n", - "Episode 84: Total Reward = 500.0\n", - "Episode 85: Total Reward = 500.0\n", - "Episode 86: Total Reward = 500.0\n", - "Episode 87: Total Reward = 500.0\n", - "Episode 88: Total Reward = 500.0\n", - "Episode 89: Total Reward = 500.0\n", - "Episode 90: Total Reward = 500.0\n", - "Episode 91: Total Reward = 500.0\n", - "Episode 92: Total Reward = 500.0\n", - "Episode 93: Total Reward = 500.0\n", - "Episode 94: Total Reward = 500.0\n", - "Episode 95: Total Reward = 500.0\n", - "Episode 96: Total Reward = 500.0\n", - "Episode 97: Total Reward = 500.0\n", - "Episode 98: Total Reward = 500.0\n", - "Episode 99: Total Reward = 500.0\n", - "Episode 100: Total Reward = 500.0\n", - "\n", - " Evaluation Completed!\n", - "Number of Perfect Episodes (Reward == 500): 100 / 100\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<Figure size 1000x500 with 1 Axes>" - ], - "image/png": "\n" - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 522/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.25 |\n", + "| ep_rew_mean | -0.428 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 84 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 435535 |\n", + "| train/ | |\n", + "| entropy_loss | -5.82 |\n", + "| explained_variance | 0.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87106 |\n", + "| policy_loss | 0.198 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.00736 |\n", + "------------------------------------\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [] - }, - "metadata": {} + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "<br> <style><br> .wandb-row {<br> display: flex;<br> flex-direction: row;<br> flex-wrap: wrap;<br> justify-content: flex-start;<br> width: 100%;<br> }<br> .wandb-col {<br> display: flex;<br> flex-direction: column;<br> flex-basis: 100%;<br> flex: 1;<br> padding: 10px;<br> }<br> </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>Episode Reward</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>Episode Reward</td><td>500</td></tr></table><br/></div></div>" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 523/600: Total Reward = -0.35\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.85 |\n", + "| ep_rew_mean | -0.316 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 140 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 436370 |\n", + "| train/ | |\n", + "| entropy_loss | -5.82 |\n", + "| explained_variance | 0.974 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87273 |\n", + "| policy_loss | -0.0137 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.000183 |\n", + "------------------------------------\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - " View run <strong style=\"color:#cdcd00\">A2C-CartPole</strong> at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation/runs/si01h5dj' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation/runs/si01h5dj</a><br> View project at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/cartpole-evaluation</a><br>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)" - ] - }, - "metadata": {} + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, { - "output_type": "display_data", - "data": { - "text/plain": [ - "<IPython.core.display.HTML object>" - ], - "text/html": [ - "Find logs at: <code>./wandb/run-20250222_130200-si01h5dj/logs</code>" - ] - }, - "metadata": {} - } - ], - "source": [ - "import gymnasium as gym\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import wandb\n", - "from stable_baselines3 import A2C\n", - "from stable_baselines3.common.monitor import Monitor\n", - "from stable_baselines3.common.vec_env import DummyVecEnv\n", - "from huggingface_sb3 import load_from_hub\n", - "import os\n", - "from gymnasium.wrappers import RecordVideo\n", - "from IPython.display import Video, display\n", - "\n", - "\n", - "# Initialize Weights & Biases for evaluation\n", - "wandb.init(project=\"cartpole-evaluation\", name=\"A2C-CartPole\", config={\"num_episodes\": 100})\n", - "\n", - "# Define Hugging Face repository and model filename\n", - "repo_id = \"oussamab2n/a2c-cartpole-wb\"\n", - "filename = \"a2c-cartpole-wb.zip\"\n", - "\n", - "# Load model from Hugging Face Hub\n", - "model_path = load_from_hub(repo_id=repo_id, filename=filename)\n", - "model = A2C.load(model_path)\n", - "\n", - "# Create video folder\n", - "video_dir = \"videos\"\n", - "os.makedirs(video_dir, exist_ok=True)\n", - "\n", - "# Create evaluation environment\n", - "env = gym.make(\"CartPole-v1\", render_mode=\"rgb_array\")\n", - "env = RecordVideo(env, video_folder=video_dir, episode_trigger=lambda e: e % 10 == 0) # Record every 10 episodes\n", - "env = Monitor(env)\n", - "env = DummyVecEnv([lambda: env])\n", - "\n", - "# Initialize tracking variables\n", - "num_episodes = 100\n", - "perfect_episodes = 0 # Count episodes with reward == 500\n", - "episode_rewards = []\n", - "\n", - "for episode in range(num_episodes):\n", - " obs = env.reset()\n", - " done = False\n", - " total_reward = 0\n", - "\n", - " while not done:\n", - " action, _ = model.predict(obs, deterministic=True)\n", - " obs, reward, done, info = env.step(action)\n", - "\n", - " # Correct step call for Gymnasium\n", - " total_reward += reward[0]\n", - " done = done[0]\n", - "\n", - " episode_rewards.append(total_reward)\n", - " wandb.log({\"Episode Reward\": total_reward}) # Log reward in Weights & Biases\n", - "\n", - " # Count perfect episodes\n", - " if total_reward == 500:\n", - " perfect_episodes += 1\n", - "\n", - " print(f\"Episode {episode+1}: Total Reward = {total_reward}\")\n", - "\n", - "# Print final results\n", - "print(\"\\n Evaluation Completed!\")\n", - "print(f\"Number of Perfect Episodes (Reward == 500): {perfect_episodes} / {num_episodes}\")\n", - "\n", - "# Close environment\n", - "env.close()\n", - "\n", - "# Plot rewards\n", - "plt.figure(figsize=(10, 5))\n", - "plt.plot(range(1, num_episodes + 1), episode_rewards, marker=\"o\", linestyle=\"-\", color=\"b\", label=\"Total Reward per Episode\")\n", - "plt.xlabel(\"Episode\")\n", - "plt.ylabel(\"Total Reward\")\n", - "plt.title(\"Total Reward per Episode during Evaluation\")\n", - "plt.legend()\n", - "plt.grid(True)\n", - "plt.show()\n", - "\n", - "# Finish Weights & Biases logging\n", - "wandb.finish()\n" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "03743e7926594213aaa8a4ea7f149e47": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_4916f07bcd1c47339946d15b1b44285b", - "placeholder": "", - "style": "IPY_MODEL_1c9b00b0286f45a1a38ad992723d0efa", - "value": " 43.4k/43.4k [00:00<00:00, 97.7kB/s]" - } - }, - "053eca19faa24c35833c132c1af7b94c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "0c6aaba7ffb04fe2a5babdcb5c3e955d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "0ed8dfb35a3842bf8be3ae505eeba5d1": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a46532aef6c042b884586a337658727c", - "max": 864, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_e43e0ac1d310408b914dddfaee691f9a", - "value": 864 - } + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 524/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.323 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 89 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 437205 |\n", + "| train/ | |\n", + "| entropy_loss | -5.81 |\n", + "| explained_variance | 0.997 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87440 |\n", + "| policy_loss | -0.149 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.000791 |\n", + "------------------------------------\n" + ] }, - "105a958acd644ed2bcab9791de217aed": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_b5ddd303cc494ecd865fab744c7c7017", - "IPY_MODEL_477c8ee0c2d94a5f9113b04e7e564aae", - "IPY_MODEL_22963ccded02458d8047644dea108c0f" - ], - "layout": "IPY_MODEL_9cd838bc92c144108ad7fb2717dfa058" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "10bbdba263394d43937799fb02a5308e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_dd70225db29144e4908328ba805f4771", - "max": 41074, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_cfd430b93720484583e1bfa3c6ea944a", - "value": 41074 - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 525/600: Total Reward = -0.39\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.81 |\n", + "| ep_rew_mean | -0.315 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 438040 |\n", + "| train/ | |\n", + "| entropy_loss | -5.8 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87607 |\n", + "| policy_loss | -0.162 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.00144 |\n", + "------------------------------------\n" + ] }, - "10f2c206b3b6403f871d78f6f665c937": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "13f76670252d4fcbaf43c831b8fe1ee7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 526/600: Total Reward = -0.02\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.65 |\n", + "| ep_rew_mean | -0.291 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 438875 |\n", + "| train/ | |\n", + "| entropy_loss | -5.83 |\n", + "| explained_variance | 0.824 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87774 |\n", + "| policy_loss | -0.113 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.000857 |\n", + "------------------------------------\n" + ] }, - "16e142d0ae6f4e9b83fcf24b79b9a858": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "1749d9f40d0348d7ae4709fc273c9121": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 527/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.85 |\n", + "| ep_rew_mean | -0.31 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 95 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 439710 |\n", + "| train/ | |\n", + "| entropy_loss | -5.85 |\n", + "| explained_variance | 0.972 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 87941 |\n", + "| policy_loss | 0.163 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0013 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 528/600: Total Reward = -0.40\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.26 |\n", + "| ep_rew_mean | -0.251 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 440545 |\n", + "| train/ | |\n", + "| entropy_loss | -5.85 |\n", + "| explained_variance | 0.64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88108 |\n", + "| policy_loss | -0.442 |\n", + "| std | 0.589 |\n", + "| value_loss | 0.012 |\n", + "------------------------------------\n" + ] }, - "19a6e413bee74cc083c01d1b5d8dfee6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "1c9b00b0286f45a1a38ad992723d0efa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 529/600: Total Reward = -0.51\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.55 |\n", + "| ep_rew_mean | -0.273 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 71 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 441380 |\n", + "| train/ | |\n", + "| entropy_loss | -5.81 |\n", + "| explained_variance | -8.13 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88275 |\n", + "| policy_loss | 1.96 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.165 |\n", + "------------------------------------\n" + ] }, - "1ee3d947c1844b138e29a138801a5e00": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_53e05aff6c98440e852b7d426197edb1", - "placeholder": "", - "style": "IPY_MODEL_666bb151dce54bec81944df29f2a8a3b", - "value": "Upload 4 LFS files: 100%" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "22963ccded02458d8047644dea108c0f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_5a73da851a4443208a79bfadb025bec4", - "placeholder": "", - "style": "IPY_MODEL_10f2c206b3b6403f871d78f6f665c937", - "value": " 41.1k/41.1k [00:00<00:00, 98.6kB/s]" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 530/600: Total Reward = -0.53\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.22 |\n", + "| ep_rew_mean | -0.419 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 124 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 442215 |\n", + "| train/ | |\n", + "| entropy_loss | -5.84 |\n", + "| explained_variance | 0.932 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88442 |\n", + "| policy_loss | 0.842 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0153 |\n", + "------------------------------------\n" + ] }, - "23f1b8afdbbf4dc3a3ddaaae094a287a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7595680d757448eab257e813cbf506b1", - "placeholder": "", - "style": "IPY_MODEL_b99ab3e32f10455398319b8c3e99ed18", - "value": " 864/864 [00:00<00:00, 5.30kB/s]" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "253114895704438e802214336c28eaf2": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 531/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.89 |\n", + "| ep_rew_mean | -0.301 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 70 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 7 |\n", + "| total_timesteps | 443050 |\n", + "| train/ | |\n", + "| entropy_loss | -5.84 |\n", + "| explained_variance | 0.987 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88609 |\n", + "| policy_loss | -0.299 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.00245 |\n", + "------------------------------------\n" + ] }, - "28e092db03774e4db332da6f1f0d3dd6": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_e64bd642eab44cffa9d8c8e3a64103e7", - "placeholder": "", - "style": "IPY_MODEL_053eca19faa24c35833c132c1af7b94c", - "value": "pytorch_variables.pth: 100%" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "297db692b2d749979115c2468eb4fb00": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 532/600: Total Reward = -0.61\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.55 |\n", + "| ep_rew_mean | -0.274 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 443885 |\n", + "| train/ | |\n", + "| entropy_loss | -5.85 |\n", + "| explained_variance | 0.121 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88776 |\n", + "| policy_loss | 0.0418 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.000901 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 533/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.71 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 81 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 444720 |\n", + "| train/ | |\n", + "| entropy_loss | -5.84 |\n", + "| explained_variance | 0.986 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 88943 |\n", + "| policy_loss | 0.625 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0103 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "31324087708d4cfca89d7a16e3362a6d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ac6c299386344084ae368388e69f247a", - "placeholder": "", - "style": "IPY_MODEL_87f804cec1814961a3f20d731cf1cb4b", - "value": "policy.pth: 100%" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 534/600: Total Reward = -0.27\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.311 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 131 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 445555 |\n", + "| train/ | |\n", + "| entropy_loss | -5.84 |\n", + "| explained_variance | 0.918 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89110 |\n", + "| policy_loss | -0.162 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.00103 |\n", + "------------------------------------\n" + ] }, - "331d5ea6d2a743c2a9bea658cda2e80a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "333eaf4f77744246b98456a844fcca2d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 535/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.03 |\n", + "| ep_rew_mean | -0.33 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 133 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 446390 |\n", + "| train/ | |\n", + "| entropy_loss | -5.84 |\n", + "| explained_variance | 0.97 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89277 |\n", + "| policy_loss | 0.0358 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.000241 |\n", + "------------------------------------\n" + ] }, - "360923166a664f24971d958a204eb04c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "37169f3a5e7048efb2b8dce9dbba1a3c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 536/600: Total Reward = -0.59\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.343 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 447225 |\n", + "| train/ | |\n", + "| entropy_loss | -5.83 |\n", + "| explained_variance | 0.997 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89444 |\n", + "| policy_loss | -0.127 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.000544 |\n", + "------------------------------------\n" + ] }, - "3a3f1bb3be714c71bc71a7db8fc31189": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_28e092db03774e4db332da6f1f0d3dd6", - "IPY_MODEL_b899e2f587a34972ae41853ae62236fe", - "IPY_MODEL_23f1b8afdbbf4dc3a3ddaaae094a287a" - ], - "layout": "IPY_MODEL_aa6c1072bcd94715a93b9c0f598981a5" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "3d9b748f2b5d460cbba3332dd1ee43b7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 537/600: Total Reward = -0.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.95 |\n", + "| ep_rew_mean | -0.4 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 448060 |\n", + "| train/ | |\n", + "| entropy_loss | -5.78 |\n", + "| explained_variance | 0.79 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89611 |\n", + "| policy_loss | 0.0434 |\n", + "| std | 0.584 |\n", + "| value_loss | 0.00117 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 538/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.03 |\n", + "| ep_rew_mean | -0.326 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 76 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 448895 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.961 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89778 |\n", + "| policy_loss | -0.0038 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.000711 |\n", + "------------------------------------\n" + ] }, - "406df9e39d29497dab95d0890ebc006f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "451f1b9792f64fa68eb0f68c0aefbc0d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 539/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.75 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 449730 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.998 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 89945 |\n", + "| policy_loss | 0.133 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.00105 |\n", + "------------------------------------\n" + ] }, - "477c8ee0c2d94a5f9113b04e7e564aae": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d2b3dfcf04a44fb692b800cf0bb7f802", - "max": 41074, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_a0fffd5ee818454da360030f6e1466ae", - "value": 41074 - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "4916f07bcd1c47339946d15b1b44285b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 540/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.332 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 72 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 450565 |\n", + "| train/ | |\n", + "| entropy_loss | -5.76 |\n", + "| explained_variance | 0.461 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90112 |\n", + "| policy_loss | 0.124 |\n", + "| std | 0.584 |\n", + "| value_loss | 0.00198 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 541/600: Total Reward = -0.02\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.34 |\n", + "| ep_rew_mean | -0.261 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 451400 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90279 |\n", + "| policy_loss | 0.269 |\n", + "| std | 0.582 |\n", + "| value_loss | 0.00193 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 542/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.71 |\n", + "| ep_rew_mean | -0.301 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 76 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 452235 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.846 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90446 |\n", + "| policy_loss | -0.0856 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.00144 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 543/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.64 |\n", + "| ep_rew_mean | -0.293 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 453070 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.978 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90613 |\n", + "| policy_loss | 0.228 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.0024 |\n", + "------------------------------------\n" + ] }, - "4e4f9c3ec0644222b01f71e9a768da09": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_331d5ea6d2a743c2a9bea658cda2e80a", - "placeholder": "", - "style": "IPY_MODEL_e8db5be66c3342dc8817b3990fd4fd12", - "value": " 4/4 [00:00<00:00, 2.71it/s]" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "508ba1fe83a24b83a6b26626ff71f22f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_cd24c5adf102474493f9b7642a65dd57", - "max": 4, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_8711ae2fa5174670afb295d8c788d2ac", - "value": 4 - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 544/600: Total Reward = -0.46\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.99 |\n", + "| ep_rew_mean | -0.324 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 90 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 453905 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.593 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90780 |\n", + "| policy_loss | -0.0748 |\n", + "| std | 0.584 |\n", + "| value_loss | 0.00169 |\n", + "------------------------------------\n" + ] }, - "536343b057e64933a62fe9aac074da24": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "53e05aff6c98440e852b7d426197edb1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 545/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.01 |\n", + "| ep_rew_mean | -0.328 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 454740 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.851 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 90947 |\n", + "| policy_loss | 0.176 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.00232 |\n", + "------------------------------------\n" + ] }, - "57b8419adedf4abe8d950d18057aa7e5": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "5a73da851a4443208a79bfadb025bec4": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 546/600: Total Reward = -0.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.17 |\n", + "| ep_rew_mean | -0.346 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 125 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 455575 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | 0.924 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91114 |\n", + "| policy_loss | -0.0204 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.000674 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "5dfdcec102ef454893d2337010e465b7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 547/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.01 |\n", + "| ep_rew_mean | -0.329 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 109 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 456410 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.899 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91281 |\n", + "| policy_loss | -0.641 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.00996 |\n", + "------------------------------------\n" + ] }, - "64b05aba20224e9caebc639dc6dd7c8f": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "666bb151dce54bec81944df29f2a8a3b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 548/600: Total Reward = -0.65\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.84 |\n", + "| ep_rew_mean | -0.31 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 457245 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.453 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91448 |\n", + "| policy_loss | -0.374 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.00723 |\n", + "------------------------------------\n" + ] }, - "6d42ee9f0e6e4195a8bae8981d14bfea": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "71ccd1edcb2149f4a5d79b047db05407": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c753780ac4be491cb5868d0c65495055", - "IPY_MODEL_8ab12f359394454ea92d8810ff7a3f21", - "IPY_MODEL_8ab306238dc74406928f03c2b665c889" - ], - "layout": "IPY_MODEL_d2bc8e91840d498eaaec6040b4fb6356" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 549/600: Total Reward = -0.08\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.93 |\n", + "| ep_rew_mean | -0.407 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 75 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 458080 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.737 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91615 |\n", + "| policy_loss | -0.619 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.0125 |\n", + "------------------------------------\n" + ] }, - "7595680d757448eab257e813cbf506b1": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 550/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.02 |\n", + "| ep_rew_mean | -0.328 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 458915 |\n", + "| train/ | |\n", + "| entropy_loss | -5.78 |\n", + "| explained_variance | 0.867 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91782 |\n", + "| policy_loss | -0.213 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.00244 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 551/600: Total Reward = -0.42\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.65 |\n", + "| ep_rew_mean | -0.296 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 75 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 459750 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.533 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 91949 |\n", + "| policy_loss | -0.237 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.00249 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 552/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.86 |\n", + "| ep_rew_mean | -0.305 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 460585 |\n", + "| train/ | |\n", + "| entropy_loss | -5.77 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92116 |\n", + "| policy_loss | 0.0207 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.000271 |\n", + "------------------------------------\n" + ] }, - "79a31c28dd1e4bb8b2e2252b5fd1a940": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "7b3c7a0ac0424cb7af4f7e1dfbdc513b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 553/600: Total Reward = -0.03\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.78 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 99 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 461420 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.994 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92283 |\n", + "| policy_loss | 0.139 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.000646 |\n", + "------------------------------------\n" + ] }, - "82fedcb1de0c4311bdab0a773473de38": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_16e142d0ae6f4e9b83fcf24b79b9a858", - "placeholder": "", - "style": "IPY_MODEL_8d88faf5346647fcb45737d57d628f41", - "value": "pytorch_variables.pth: 100%" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "852bd4183a5c4e2b9308a8f2d0f7191d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 554/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.2 |\n", + "| ep_rew_mean | -0.331 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 462255 |\n", + "| train/ | |\n", + "| entropy_loss | -5.75 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92450 |\n", + "| policy_loss | 0.52 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.00656 |\n", + "------------------------------------\n" + ] }, - "8711ae2fa5174670afb295d8c788d2ac": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "87403f7d79a64dce87b5b099fc68d0b5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 555/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.66 |\n", + "| ep_rew_mean | -0.371 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 118 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 463090 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92617 |\n", + "| policy_loss | 0.999 |\n", + "| std | 0.584 |\n", + "| value_loss | 0.0436 |\n", + "------------------------------------\n" + ] }, - "87f804cec1814961a3f20d731cf1cb4b": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 556/600: Total Reward = -0.32\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.71 |\n", + "| ep_rew_mean | -0.684 |\n", + "| success_rate | 0.94 |\n", + "| time/ | |\n", + "| fps | 81 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 463925 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.822 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92784 |\n", + "| policy_loss | -0.569 |\n", + "| std | 0.584 |\n", + "| value_loss | 0.0273 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 557/600: Total Reward = -0.16\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.3 |\n", + "| ep_rew_mean | -0.558 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 464760 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | -1.41 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 92951 |\n", + "| policy_loss | -1.69 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.175 |\n", + "------------------------------------\n" + ] }, - "8ab12f359394454ea92d8810ff7a3f21": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b35b335258a641fb8c52e2d68afdea95", - "max": 97733, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_f4c0efc7782e41e5966d2ae9eba94a6a", - "value": 97733 - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "8ab306238dc74406928f03c2b665c889": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_64b05aba20224e9caebc639dc6dd7c8f", - "placeholder": "", - "style": "IPY_MODEL_37169f3a5e7048efb2b8dce9dbba1a3c", - "value": " 97.7k/97.7k [00:00<00:00, 336kB/s]" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 558/600: Total Reward = -0.82\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.56 |\n", + "| ep_rew_mean | -0.588 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 72 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 465595 |\n", + "| train/ | |\n", + "| entropy_loss | -5.67 |\n", + "| explained_variance | 0.935 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93118 |\n", + "| policy_loss | 0.545 |\n", + "| std | 0.578 |\n", + "| value_loss | 0.0188 |\n", + "------------------------------------\n" + ] }, - "8bf9caee159f4f03b4b9be6500f7118a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "8d88faf5346647fcb45737d57d628f41": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 559/600: Total Reward = -0.20\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.21 |\n", + "| ep_rew_mean | -0.449 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 466430 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93285 |\n", + "| policy_loss | -0.0205 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.000135 |\n", + "------------------------------------\n" + ] }, - "8eecebc2ee5a42978670c9f2b0e5f592": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "8f7df43c9c924ec59695450fa933699f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b93f82c0226747c5a4cf124d92c4a359", - "placeholder": "", - "style": "IPY_MODEL_f824d4ff15ec435f94fd0ffd1950064a", - "value": " 41.1k/41.1k [00:00<00:00, 125kB/s]" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 560/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.59 |\n", + "| ep_rew_mean | -0.377 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 109 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 467265 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.956 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93452 |\n", + "| policy_loss | 0.362 |\n", + "| std | 0.584 |\n", + "| value_loss | 0.00706 |\n", + "------------------------------------\n" + ] }, - "9286d4d05f354073a56ccab0f3fd0bba": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "9564480530bc447da27e15749428bdfd": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 561/600: Total Reward = -0.18\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.81 |\n", + "| ep_rew_mean | -0.481 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 130 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 468100 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.161 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93619 |\n", + "| policy_loss | 1.48 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.0532 |\n", + "------------------------------------\n" + ] }, - "9b4427af69fd454eb54eaf678465ceee": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "9cd838bc92c144108ad7fb2717dfa058": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 562/600: Total Reward = -0.09\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.5 |\n", + "| ep_rew_mean | -0.36 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 107 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 468935 |\n", + "| train/ | |\n", + "| entropy_loss | -5.7 |\n", + "| explained_variance | 0.382 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93786 |\n", + "| policy_loss | 1.23 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.0692 |\n", + "------------------------------------\n" + ] }, - "a0fffd5ee818454da360030f6e1466ae": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "a46532aef6c042b884586a337658727c": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 563/600: Total Reward = -0.24\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.12 |\n", + "| ep_rew_mean | -0.412 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 77 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 469770 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | -11.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 93953 |\n", + "| policy_loss | -2.64 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.245 |\n", + "------------------------------------\n" + ] }, - "a6d98acbf717475dbfd80407d9256f5d": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "a7098ce07385468ba0b61a39601155ca": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_a6d98acbf717475dbfd80407d9256f5d", - "max": 43406, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_57b8419adedf4abe8d950d18057aa7e5", - "value": 43406 - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 564/600: Total Reward = -0.24\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.97 |\n", + "| ep_rew_mean | -0.44 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 470605 |\n", + "| train/ | |\n", + "| entropy_loss | -5.73 |\n", + "| explained_variance | 0.942 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94120 |\n", + "| policy_loss | -0.539 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.0129 |\n", + "------------------------------------\n" + ] }, - "aa6c1072bcd94715a93b9c0f598981a5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "ac6c299386344084ae368388e69f247a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 565/600: Total Reward = -0.33\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.48 |\n", + "| ep_rew_mean | -0.373 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 75 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 471440 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.365 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94287 |\n", + "| policy_loss | -0.0916 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.00317 |\n", + "------------------------------------\n" + ] }, - "ad10bcfa05b64376905c996ecc1d455d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3d9b748f2b5d460cbba3332dd1ee43b7", - "max": 4, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_360923166a664f24971d958a204eb04c", - "value": 4 - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "adc0827744e1479b8833205a44c83e1b": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 566/600: Total Reward = -0.30\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.29 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 113 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 472275 |\n", + "| train/ | |\n", + "| entropy_loss | -5.77 |\n", + "| explained_variance | 0.38 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94454 |\n", + "| policy_loss | 0.437 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.00852 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 567/600: Total Reward = -0.46\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.88 |\n", + "| ep_rew_mean | -0.389 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 105 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 473110 |\n", + "| train/ | |\n", + "| entropy_loss | -5.74 |\n", + "| explained_variance | 0.766 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94621 |\n", + "| policy_loss | 0.401 |\n", + "| std | 0.589 |\n", + "| value_loss | 0.0421 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "aef7b10c8f44483c825b7fd9a847f089": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_ecf4c9e4c7cd4ba698a5e8c4aa8b3071", - "placeholder": "", - "style": "IPY_MODEL_253114895704438e802214336c28eaf2", - "value": " 864/864 [00:00<00:00, 6.77kB/s]" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 568/600: Total Reward = -0.95\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.39 |\n", + "| ep_rew_mean | -0.425 |\n", + "| success_rate | 0.98 |\n", + "| time/ | |\n", + "| fps | 112 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 473945 |\n", + "| train/ | |\n", + "| entropy_loss | -5.71 |\n", + "| explained_variance | -2.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94788 |\n", + "| policy_loss | -1.07 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0609 |\n", + "------------------------------------\n" + ] }, - "b30ea1ecb76544e4a4b4b6171e71b728": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_f4987c77de034d6a9a5fbe63f5db9b9e", - "IPY_MODEL_bdbb3e03cdda455ca313b37212800dbb", - "IPY_MODEL_e3e212292e2649f1969c4c31647cb680" - ], - "layout": "IPY_MODEL_dced652d1bd14be7ba2b65eae579fba5" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "b35b335258a641fb8c52e2d68afdea95": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 569/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.27 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 131 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 474780 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.602 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 94955 |\n", + "| policy_loss | -0.274 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.00404 |\n", + "------------------------------------\n" + ] }, - "b515add3f25544cbb2b14b79b2e2770a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_82fedcb1de0c4311bdab0a773473de38", - "IPY_MODEL_0ed8dfb35a3842bf8be3ae505eeba5d1", - "IPY_MODEL_aef7b10c8f44483c825b7fd9a847f089" - ], - "layout": "IPY_MODEL_87403f7d79a64dce87b5b099fc68d0b5" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "b5ddd303cc494ecd865fab744c7c7017": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_8bf9caee159f4f03b4b9be6500f7118a", - "placeholder": "", - "style": "IPY_MODEL_5dfdcec102ef454893d2337010e465b7", - "value": "policy.pth: 100%" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 570/600: Total Reward = -0.14\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.343 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 72 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 475615 |\n", + "| train/ | |\n", + "| entropy_loss | -5.69 |\n", + "| explained_variance | 0.932 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95122 |\n", + "| policy_loss | -0.00937 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.00128 |\n", + "------------------------------------\n" + ] }, - "b6d13ee968fa4473baecb877b8f302f8": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "b899e2f587a34972ae41853ae62236fe": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_536343b057e64933a62fe9aac074da24", - "max": 864, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_333eaf4f77744246b98456a844fcca2d", - "value": 864 - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 571/600: Total Reward = -0.07\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.91 |\n", + "| ep_rew_mean | -0.302 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 119 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 476450 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.931 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95289 |\n", + "| policy_loss | 0.244 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.00593 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 572/600: Total Reward = -0.18\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.81 |\n", + "| ep_rew_mean | -0.389 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 72 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 477285 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | -0.873 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95456 |\n", + "| policy_loss | -0.887 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.0343 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 573/600: Total Reward = -4.22\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.76 |\n", + "| ep_rew_mean | -0.473 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 121 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 478120 |\n", + "| train/ | |\n", + "| entropy_loss | -5.64 |\n", + "| explained_variance | 0.435 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95623 |\n", + "| policy_loss | 1.83 |\n", + "| std | 0.582 |\n", + "| value_loss | 0.199 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "b93f82c0226747c5a4cf124d92c4a359": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 574/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.23 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 115 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 478955 |\n", + "| train/ | |\n", + "| entropy_loss | -5.67 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95790 |\n", + "| policy_loss | 0.248 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.00272 |\n", + "------------------------------------\n" + ] }, - "b99ab3e32f10455398319b8c3e99ed18": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "b9f0d24741ba48308545746354105fc3": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_31324087708d4cfca89d7a16e3362a6d", - "IPY_MODEL_10bbdba263394d43937799fb02a5308e", - "IPY_MODEL_8f7df43c9c924ec59695450fa933699f" - ], - "layout": "IPY_MODEL_db5981fe348443ee9ea1e17a3c86aa34" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 575/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.68 |\n", + "| ep_rew_mean | -0.284 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 78 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 479790 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | 0.646 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 95957 |\n", + "| policy_loss | 0.592 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.0178 |\n", + "------------------------------------\n" + ] }, - "ba48466f16a44983b96b20123c966be9": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "bdbb3e03cdda455ca313b37212800dbb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_9286d4d05f354073a56ccab0f3fd0bba", - "max": 43406, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_852bd4183a5c4e2b9308a8f2d0f7191d", - "value": 43406 - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 576/600: Total Reward = -0.52\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.8 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 119 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 480625 |\n", + "| train/ | |\n", + "| entropy_loss | -5.72 |\n", + "| explained_variance | 0.929 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96124 |\n", + "| policy_loss | -1.44 |\n", + "| std | 0.59 |\n", + "| value_loss | 0.0749 |\n", + "------------------------------------\n" + ] }, - "c0b5a4535a3b4ca2b0e7c86e1b3ab5ec": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "c135ded2efad4a0d8bddea2e51bc6459": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f183c2895da7440a80edc87f5a130fee", - "placeholder": "", - "style": "IPY_MODEL_8eecebc2ee5a42978670c9f2b0e5f592", - "value": " 4/4 [00:00<00:00, 2.98it/s]" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 577/600: Total Reward = -0.10\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.39 |\n", + "| ep_rew_mean | -0.353 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 73 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 481460 |\n", + "| train/ | |\n", + "| entropy_loss | -5.68 |\n", + "| explained_variance | 0.974 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96291 |\n", + "| policy_loss | -0.619 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0108 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 578/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.05 |\n", + "| ep_rew_mean | -0.322 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 129 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 482295 |\n", + "| train/ | |\n", + "| entropy_loss | -5.65 |\n", + "| explained_variance | 0.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96458 |\n", + "| policy_loss | -0.0796 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.000614 |\n", + "------------------------------------\n" + ] }, - "c48c8dc4a77f4dfaa2adede6518d373c": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "success", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_0c6aaba7ffb04fe2a5babdcb5c3e955d", - "max": 100507, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_b6d13ee968fa4473baecb877b8f302f8", - "value": 100507 - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "c753780ac4be491cb5868d0c65495055": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_1749d9f40d0348d7ae4709fc273c9121", - "placeholder": "", - "style": "IPY_MODEL_6d42ee9f0e6e4195a8bae8981d14bfea", - "value": "a2c-cartpole.zip: 100%" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 579/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.61 |\n", + "| ep_rew_mean | -0.52 |\n", + "| success_rate | 0.97 |\n", + "| time/ | |\n", + "| fps | 83 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 483130 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | 0.874 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96625 |\n", + "| policy_loss | 0.00827 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.00111 |\n", + "------------------------------------\n" + ] }, - "c8df02b3721d4a97b6c6336e896ff179": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_f22b7b1e400a4ed98dc754b073a9a40a", - "placeholder": "", - "style": "IPY_MODEL_19a6e413bee74cc083c01d1b5d8dfee6", - "value": "a2c-cartpole-wb.zip: 100%" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "cd24c5adf102474493f9b7642a65dd57": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 580/600: Total Reward = -0.04\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.77 |\n", + "| ep_rew_mean | -0.594 |\n", + "| success_rate | 0.96 |\n", + "| time/ | |\n", + "| fps | 128 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 483965 |\n", + "| train/ | |\n", + "| entropy_loss | -5.64 |\n", + "| explained_variance | 0.662 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96792 |\n", + "| policy_loss | -1.7 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.114 |\n", + "------------------------------------\n" + ] }, - "cdca8fecc2cc412282c4f41ba141017d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_406df9e39d29497dab95d0890ebc006f", - "placeholder": "", - "style": "IPY_MODEL_da0b9ac258dd482fa342021ceb684aa7", - "value": "Upload 4 LFS files: 100%" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "cfd430b93720484583e1bfa3c6ea944a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 581/600: Total Reward = -0.77\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.94 |\n", + "| ep_rew_mean | -0.5 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 123 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 484800 |\n", + "| train/ | |\n", + "| entropy_loss | -5.63 |\n", + "| explained_variance | -0.163 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 96959 |\n", + "| policy_loss | 0.807 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.0315 |\n", + "------------------------------------\n" + ] }, - "d2b3dfcf04a44fb692b800cf0bb7f802": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 582/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.42 |\n", + "| ep_rew_mean | -0.373 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 86 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 485635 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | 0.899 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97126 |\n", + "| policy_loss | 0.595 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.0199 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "d2bc8e91840d498eaaec6040b4fb6356": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 583/600: Total Reward = -0.18\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.16 |\n", + "| ep_rew_mean | -0.324 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 117 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 486470 |\n", + "| train/ | |\n", + "| entropy_loss | -5.67 |\n", + "| explained_variance | 0.921 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97293 |\n", + "| policy_loss | -0.165 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.00223 |\n", + "------------------------------------\n" + ] }, - "d7cfb3fea4444e6ca69e7bdb4f86d40d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_c8df02b3721d4a97b6c6336e896ff179", - "IPY_MODEL_c48c8dc4a77f4dfaa2adede6518d373c", - "IPY_MODEL_ea68977e2bc642358854ed2313d4c9bc" - ], - "layout": "IPY_MODEL_13f76670252d4fcbaf43c831b8fe1ee7" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "d996b268e9c24f85a73ca1017f8cc5fa": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_cdca8fecc2cc412282c4f41ba141017d", - "IPY_MODEL_508ba1fe83a24b83a6b26626ff71f22f", - "IPY_MODEL_c135ded2efad4a0d8bddea2e51bc6459" - ], - "layout": "IPY_MODEL_9b4427af69fd454eb54eaf678465ceee" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 584/600: Total Reward = -0.21\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.21 |\n", + "| ep_rew_mean | -0.415 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 73 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 487305 |\n", + "| train/ | |\n", + "| entropy_loss | -5.65 |\n", + "| explained_variance | 0.705 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97460 |\n", + "| policy_loss | 0.333 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.00704 |\n", + "------------------------------------\n" + ] }, - "da0b9ac258dd482fa342021ceb684aa7": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "dac9eb72052f448fbb17389d3acfbbfb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_451f1b9792f64fa68eb0f68c0aefbc0d", - "placeholder": "", - "style": "IPY_MODEL_ba48466f16a44983b96b20123c966be9", - "value": "policy.optimizer.pth: 100%" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 585/600: Total Reward = -0.22\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.43 |\n", + "| ep_rew_mean | -0.354 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 116 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 488140 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | -1.64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97627 |\n", + "| policy_loss | 1.49 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0482 |\n", + "------------------------------------\n" + ] }, - "db5981fe348443ee9ea1e17a3c86aa34": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 586/600: Total Reward = -0.75\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.98 |\n", + "| ep_rew_mean | -0.401 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 100 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 488975 |\n", + "| train/ | |\n", + "| entropy_loss | -5.64 |\n", + "| explained_variance | 0.316 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97794 |\n", + "| policy_loss | -0.481 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.0144 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 587/600: Total Reward = -0.59\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.76 |\n", + "| ep_rew_mean | -0.404 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 97 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 489810 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | 0.964 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 97961 |\n", + "| policy_loss | -0.0906 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.00213 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 588/600: Total Reward = -0.38\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.85 |\n", + "| ep_rew_mean | -0.406 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 120 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 490645 |\n", + "| train/ | |\n", + "| entropy_loss | -5.64 |\n", + "| explained_variance | 0.263 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98128 |\n", + "| policy_loss | 0.262 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.00946 |\n", + "------------------------------------\n" + ] }, - "dced652d1bd14be7ba2b65eae579fba5": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "dd70225db29144e4908328ba805f4771": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 589/600: Total Reward = -0.15\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.08 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 70 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 7 |\n", + "| total_timesteps | 491480 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | -0.24 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98295 |\n", + "| policy_loss | -0.135 |\n", + "| std | 0.588 |\n", + "| value_loss | 0.00285 |\n", + "------------------------------------\n" + ] }, - "de7f77f12aa0470ab36a5afecd9f14fc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_1ee3d947c1844b138e29a138801a5e00", - "IPY_MODEL_ad10bcfa05b64376905c996ecc1d455d", - "IPY_MODEL_4e4f9c3ec0644222b01f71e9a768da09" - ], - "layout": "IPY_MODEL_c0b5a4535a3b4ca2b0e7c86e1b3ab5ec" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "e097d0070f30469abbd542994e74bf81": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 590/600: Total Reward = -0.72\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.25 |\n", + "| ep_rew_mean | -0.342 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 113 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 492315 |\n", + "| train/ | |\n", + "| entropy_loss | -5.64 |\n", + "| explained_variance | 0.98 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98462 |\n", + "| policy_loss | -0.12 |\n", + "| std | 0.585 |\n", + "| value_loss | 0.000953 |\n", + "------------------------------------\n" + ] }, - "e3e212292e2649f1969c4c31647cb680": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_297db692b2d749979115c2468eb4fb00", - "placeholder": "", - "style": "IPY_MODEL_fad8b1c404724e4dbd05c747b1bcd5de", - "value": " 43.4k/43.4k [00:00<00:00, 150kB/s]" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 591/600: Total Reward = -0.77\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.11 |\n", + "| ep_rew_mean | -0.328 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 81 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 493150 |\n", + "| train/ | |\n", + "| entropy_loss | -5.62 |\n", + "| explained_variance | -0.494 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98629 |\n", + "| policy_loss | -0.64 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.0174 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 592/600: Total Reward = -0.36\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.43 |\n", + "| ep_rew_mean | -0.362 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 122 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 493985 |\n", + "| train/ | |\n", + "| entropy_loss | -5.66 |\n", + "| explained_variance | 0.0971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98796 |\n", + "| policy_loss | -0.925 |\n", + "| std | 0.587 |\n", + "| value_loss | 0.0345 |\n", + "------------------------------------\n" + ] }, - "e43e0ac1d310408b914dddfaee691f9a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "e64bd642eab44cffa9d8c8e3a64103e7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 593/600: Total Reward = -0.13\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.98 |\n", + "| ep_rew_mean | -0.336 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 108 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 494820 |\n", + "| train/ | |\n", + "| entropy_loss | -5.68 |\n", + "| explained_variance | 0.976 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 98963 |\n", + "| policy_loss | 0.0786 |\n", + "| std | 0.589 |\n", + "| value_loss | 0.000305 |\n", + "------------------------------------\n" + ] }, - "e8db5be66c3342dc8817b3990fd4fd12": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "ea68977e2bc642358854ed2313d4c9bc": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_7b3c7a0ac0424cb7af4f7e1dfbdc513b", - "placeholder": "", - "style": "IPY_MODEL_79a31c28dd1e4bb8b2e2252b5fd1a940", - "value": " 101k/101k [00:00<00:00, 113kB/s]" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 594/600: Total Reward = -0.43\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.02 |\n", + "| ep_rew_mean | -0.332 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 71 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 495655 |\n", + "| train/ | |\n", + "| entropy_loss | -5.65 |\n", + "| explained_variance | 0.0608 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99130 |\n", + "| policy_loss | 0.0974 |\n", + "| std | 0.586 |\n", + "| value_loss | 0.00422 |\n", + "------------------------------------\n" + ] }, - "ecf4c9e4c7cd4ba698a5e8c4aa8b3071": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "f183c2895da7440a80edc87f5a130fee": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 595/600: Total Reward = -0.23\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.29 |\n", + "| ep_rew_mean | -0.418 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 113 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 496490 |\n", + "| train/ | |\n", + "| entropy_loss | -5.59 |\n", + "| explained_variance | 0.659 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99297 |\n", + "| policy_loss | -0.117 |\n", + "| std | 0.58 |\n", + "| value_loss | 0.0167 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "f22b7b1e400a4ed98dc754b073a9a40a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 596/600: Total Reward = -0.28\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.88 |\n", + "| ep_rew_mean | -0.378 |\n", + "| success_rate | 0.99 |\n", + "| time/ | |\n", + "| fps | 76 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 497325 |\n", + "| train/ | |\n", + "| entropy_loss | -5.59 |\n", + "| explained_variance | 0.994 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99464 |\n", + "| policy_loss | 0.197 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.00119 |\n", + "------------------------------------\n" + ] }, - "f48bd46317b54ebc9d55eee77b4eb165": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_dac9eb72052f448fbb17389d3acfbbfb", - "IPY_MODEL_a7098ce07385468ba0b61a39601155ca", - "IPY_MODEL_03743e7926594213aaa8a4ea7f149e47" - ], - "layout": "IPY_MODEL_e097d0070f30469abbd542994e74bf81" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "f4987c77de034d6a9a5fbe63f5db9b9e": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_adc0827744e1479b8833205a44c83e1b", - "placeholder": "", - "style": "IPY_MODEL_9564480530bc447da27e15749428bdfd", - "value": "policy.optimizer.pth: 100%" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 597/600: Total Reward = -0.69\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.4 |\n", + "| ep_rew_mean | -0.339 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 83 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 498160 |\n", + "| train/ | |\n", + "| entropy_loss | -5.57 |\n", + "| explained_variance | 0.955 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99631 |\n", + "| policy_loss | 0.111 |\n", + "| std | 0.579 |\n", + "| value_loss | 0.00115 |\n", + "------------------------------------\n" + ] }, - "f4c0efc7782e41e5966d2ae9eba94a6a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "f824d4ff15ec435f94fd0ffd1950064a": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 598/600: Total Reward = -0.19\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.48 |\n", + "| ep_rew_mean | -0.27 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 112 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 498995 |\n", + "| train/ | |\n", + "| entropy_loss | -5.59 |\n", + "| explained_variance | 0.896 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99798 |\n", + "| policy_loss | 0.119 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.000871 |\n", + "------------------------------------\n" + ] }, - "fad8b1c404724e4dbd05c747b1bcd5de": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] }, - "a52deb23470f4f9d9acfe28e69f47fb5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "VBoxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "VBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "VBoxView", - "box_style": "", - "children": [], - "layout": "IPY_MODEL_136b0ee69b6a4012a1c4a92ea45a352e" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 599/600: Total Reward = -0.77\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 3.7 |\n", + "| ep_rew_mean | -0.298 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 71 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 7 |\n", + "| total_timesteps | 499830 |\n", + "| train/ | |\n", + "| entropy_loss | -5.58 |\n", + "| explained_variance | 0.968 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99965 |\n", + "| policy_loss | 0.0586 |\n", + "| std | 0.581 |\n", + "| value_loss | 0.00044 |\n", + "------------------------------------\n" + ] }, - "c7294d001a9c44aca712bba2e7141b35": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_02dcba9d3e194f79890227e180c37474", - "placeholder": "", - "style": "IPY_MODEL_6376dea1e82f4c159ea5062c1b3b14ef", - "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>" - } + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 600/600: Total Reward = -0.37\n", + "Logging to runs/aqrdlwti/A2C_0\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.75 |\n", + "| ep_rew_mean | -0.388 |\n", + "| success_rate | 1 |\n", + "| time/ | |\n", + "| fps | 108 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 500665 |\n", + "| train/ | |\n", + "| entropy_loss | -5.6 |\n", + "| explained_variance | 0.797 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 100132 |\n", + "| policy_loss | -0.57 |\n", + "| std | 0.583 |\n", + "| value_loss | 0.0129 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Evaluation: mean_reward=-0.46 +/- 0.35\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<Figure size 1000x500 with 1 Axes>" + ], + "image/png": "\n" + }, + "metadata": {} }, - "659607dd3c294904913ccb88a2ecfea5": { - "model_module": "@jupyter-widgets/controls", - "model_name": "PasswordModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "PasswordModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "PasswordView", - "continuous_update": true, - "description": "Token:", - "description_tooltip": null, - "disabled": false, - "layout": "IPY_MODEL_1a6532f14ca74a479f01155019a2a30f", - "placeholder": "", - "style": "IPY_MODEL_3f9f02c927bb4e0c9e0002703189fbfa", - "value": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n", + "create a model card and push everything to the hub. It might take up to 1min.\n", + "This is a work in progress: if you encounter a bug, please open an issue.\u001b[0m\n", + "Saving video to /tmp/tmpus46mpyv/-step-0-to-step-1000.mp4\n", + "Moviepy - Building video /tmp/tmpus46mpyv/-step-0-to-step-1000.mp4.\n", + "Moviepy - Writing video /tmp/tmpus46mpyv/-step-0-to-step-1000.mp4\n", + "\n" + ] }, - "ec4b7fdf9f344b5eb5aabfe00395ddea": { - "model_module": "@jupyter-widgets/controls", - "model_name": "CheckboxModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "CheckboxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "CheckboxView", - "description": "Add token as git credential?", - "description_tooltip": null, - "disabled": false, - "indent": true, - "layout": "IPY_MODEL_c8a5fea9ebed4821821592ae85b0af71", - "style": "IPY_MODEL_439ac621f2cb4c0d91749ee09729453b", - "value": true - } + { + "output_type": "stream", + "name": "stderr", + "text": [] }, - "4064541754324b308f9e02565edc7fc2": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ButtonModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ButtonModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ButtonView", - "button_style": "", - "description": "Login", - "disabled": false, - "icon": "", - "layout": "IPY_MODEL_f116aadb2ef94eb8aa8a6b43c7b4fb5d", - "style": "IPY_MODEL_4f814644155549caa91d2d81d9333740", - "tooltip": "" - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Moviepy - Done !\n", + "Moviepy - video ready /tmp/tmpus46mpyv/-step-0-to-step-1000.mp4\n", + "\u001b[38;5;1m✘ 'DummyVecEnv' object has no attribute 'video_recorder'\u001b[0m\n", + "\u001b[38;5;1m✘ We are unable to generate a replay of your agent, the package_to_hub\n", + "process continues\u001b[0m\n", + "\u001b[38;5;1m✘ Please open an issue at\n", + "https://github.com/huggingface/huggingface_sb3/issues\u001b[0m\n", + "\u001b[38;5;4mℹ Pushing repo oussamab2n/a2c-panda-reach to the Hugging Face Hub\u001b[0m\n" + ] }, - "cc92a18e333e454eaf4d2b27ccad782b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "HTMLModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_3675a21548ee4857b98b3bc0a9c206f3", - "placeholder": "", - "style": "IPY_MODEL_463f22dcd9da484f98795b42c7406fb4", - "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>" - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "policy.optimizer.pth: 0%| | 0.00/49.2k [00:00<?, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "c78c5ec0063b416289dce2fe89e9b0ad" + } + }, + "metadata": {} }, - "136b0ee69b6a4012a1c4a92ea45a352e": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": "center", - "align_self": null, - "border": null, - "bottom": null, - "display": "flex", - "flex": null, - "flex_flow": "column", - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": "50%" - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Upload 3 LFS files: 0%| | 0/3 [00:00<?, ?it/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "adbdabe296864d4bb49cd7bf8a6eaa81" + } + }, + "metadata": {} }, - "02dcba9d3e194f79890227e180c37474": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "policy.pth: 0%| | 0.00/47.3k [00:00<?, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "af001b75d30f48c7a5a02293afa17ba5" + } + }, + "metadata": {} }, - "6376dea1e82f4c159ea5062c1b3b14ef": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "a2c-panda-reach.zip: 0%| | 0.00/114k [00:00<?, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "fc348c31afbf470a84acde08f7f4142c" + } + }, + "metadata": {} }, - "1a6532f14ca74a479f01155019a2a30f": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:\n", + "https://huggingface.co/oussamab2n/a2c-panda-reach/tree/main/\u001b[0m\n" + ] }, - "3f9f02c927bb4e0c9e0002703189fbfa": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [] + }, + "metadata": {} }, - "c8a5fea9ebed4821821592ae85b0af71": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "<br> <style><br> .wandb-row {<br> display: flex;<br> flex-direction: row;<br> flex-wrap: wrap;<br> justify-content: flex-start;<br> width: 100%;<br> }<br> .wandb-col {<br> display: flex;<br> flex-direction: column;<br> flex-basis: 100%;<br> flex: 1;<br> padding: 10px;<br> }<br> </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>▁▁▂▂▂▃▄▄▁▄▃▄▂▂▅▁▄▃▆▃▃▆▃▆▁▆▅▂▇▇▇▇▇▁▇██▂▄█</td></tr><tr><td>mean_reward</td><td>▁</td></tr><tr><td>rollout/ep_len_mean</td><td>█▅▅▆▅▂▁▁▂▅▁▁▄▆▆▇▁▄▁▅█▇█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇▁▁</td></tr><tr><td>rollout/ep_rew_mean</td><td>▄▄▄▅▁▅███▆▅█████▃█████▅██▅▄█████████▄█▅█</td></tr><tr><td>rollout/success_rate</td><td>▁▁▂▅▁▃▃▄▄███▇██▄██▅█████████▂▅█▄████████</td></tr><tr><td>std_reward</td><td>▁</td></tr><tr><td>time/fps</td><td>██▆▇▃▆▃▂▂▁▂▅▅▆▄▄▄▄▄▄▄▄▅▁▂▁▄▄▄▃▄▁█▄▅▂▆▅▇▅</td></tr><tr><td>train/entropy_loss</td><td>▁▂▂▂▂▁▁▁▄▂▆▃▃▆▁▄▇▄▂▇▇█▁▁█▇██▂███▂▆▂█▆▆▅▆</td></tr><tr><td>train/explained_variance</td><td>███████████████▁█▄████████▇████▇████████</td></tr><tr><td>train/learning_rate</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train/policy_loss</td><td>▄▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train/std</td><td>███▇▇▆▆▅▇▅█▄▃▃▃▂▆▆█▂▆▅▁▁▅▁▂▄▇▂▁▃▁▁▁▁▁▁▁▁</td></tr><tr><td>train/value_loss</td><td>▁▁▃▁▁▃▁▃█▁▂▁▁▂▁▁█▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>500665</td></tr><tr><td>mean_reward</td><td>-0.4558</td></tr><tr><td>model_saved</td><td>True</td></tr><tr><td>rollout/ep_len_mean</td><td>4.75</td></tr><tr><td>rollout/ep_rew_mean</td><td>-0.38819</td></tr><tr><td>rollout/success_rate</td><td>1</td></tr><tr><td>std_reward</td><td>0.34822</td></tr><tr><td>time/fps</td><td>108</td></tr><tr><td>train/entropy_loss</td><td>-5.59675</td></tr><tr><td>train/explained_variance</td><td>0.79654</td></tr><tr><td>train/learning_rate</td><td>0.0007</td></tr><tr><td>train/policy_loss</td><td>-0.56994</td></tr><tr><td>train/std</td><td>0.58343</td></tr><tr><td>train/value_loss</td><td>0.01293</td></tr></table><br/></div></div>" + ] + }, + "metadata": {} }, - "439ac621f2cb4c0d91749ee09729453b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run <strong style=\"color:#cdcd00\">sweet-pyramid-32</strong> at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/aqrdlwti' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/aqrdlwti</a><br> View project at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym</a><br>Synced 5 W&B file(s), 0 media file(s), 3 artifact file(s) and 600 other file(s)" + ] + }, + "metadata": {} }, - "f116aadb2ef94eb8aa8a6b43c7b4fb5d": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Find logs at: <code>./wandb/run-20250226_140257-aqrdlwti/logs</code>" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Modèle entraîné sur 500 épisodes, évalué, sauvegardé et visualisé avec succès !\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### evalute" + ], + "metadata": { + "id": "iFiJ9KWgjKFP" + } + }, + { + "cell_type": "code", + "source": [ + "import gymnasium as gym\n", + "import panda_gym\n", + "import wandb\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from stable_baselines3 import A2C\n", + "from huggingface_sb3 import load_from_hub\n", + "from gymnasium.wrappers import RecordVideo\n", + "import os\n", + "\n", + "# Initialize Weights & Biases for evaluation\n", + "wandb.init(project=\"panda-gym\", name=\"evaluation\", config={\"num_episodes\": 100})\n", + "\n", + "# Load the model from Hugging Face Hub\n", + "repo_id = \"oussamab2n/a2c-panda-reach\"\n", + "filename = \"a2c-panda-reach.zip\"\n", + "model_path = load_from_hub(repo_id=repo_id, filename=filename)\n", + "model = A2C.load(model_path)\n", + "\n", + "# Create video folder\n", + "video_dir = \"videos\"\n", + "os.makedirs(video_dir, exist_ok=True)\n", + "\n", + "# Create environment with video recording\n", + "env = gym.make(\"PandaReachJointsDense-v3\", render_mode=\"rgb_array\")\n", + "env = RecordVideo(env, video_folder=video_dir, episode_trigger=lambda e: e % 10 == 0) # Record every 10 episodes\n", + "\n", + "# Run evaluation\n", + "num_episodes = 100\n", + "success_count = 0\n", + "episode_rewards = []\n", + "truncation_rewards = [] # Store reward at truncation\n", + "\n", + "for episode in range(num_episodes):\n", + " obs, _ = env.reset()\n", + " done = False\n", + " total_reward = 0\n", + " truncation_reward = None # Initialize reward at truncation\n", + "\n", + " while not done:\n", + " action, _ = model.predict(obs, deterministic=True)\n", + " obs, reward, terminated, truncated, info = env.step(action)\n", + "\n", + " total_reward += reward\n", + "\n", + " # Check success condition\n", + " if \"is_success\" in info and info[\"is_success\"]:\n", + " success_count += 1\n", + "\n", + " done = terminated or truncated\n", + "\n", + " episode_rewards.append(total_reward)\n", + "\n", + " # Log episode rewards\n", + " wandb.log({\"Episode\": episode + 1, \"Total Reward\": total_reward})\n", + "\n", + " print(f\"Episode {episode+1}: Total Reward = {total_reward} is_success : {info['is_success']}\")\n", + "\n", + "# Log success rate\n", + "success_rate = (success_count / num_episodes) * 100\n", + "wandb.log({\"Success Rate\": success_rate})\n", + "print(f\"\\nSuccess Rate: {success_rate:.2f}% ({success_count}/{num_episodes})\")\n", + "\n", + "# Close the environment safely\n", + "try:\n", + " env.close()\n", + "except Exception as e:\n", + " print(f\"Warning: Unable to close environment properly: {e}\")\n", + "\n", + "# Plot Total Reward per Episode\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(range(1, num_episodes + 1), episode_rewards, marker=\"o\", linestyle=\"-\", label=\"Episode Reward\")\n", + "plt.xlabel(\"Episode\")\n", + "plt.ylabel(\"Total Reward\")\n", + "plt.title(\"Total Reward per Episode (Evaluation)\")\n", + "plt.legend()\n", + "plt.grid()\n", + "plt.show()\n", + "\n", + "# Finish Weights & Biases logging\n", + "wandb.finish()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 }, - "4f814644155549caa91d2d81d9333740": { - "model_module": "@jupyter-widgets/controls", - "model_name": "ButtonStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ButtonStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "button_color": null, - "font_weight": "" - } + "id": "o0EsUC_8pFK-", + "outputId": "b45c2335-8894-4934-b36a-0bc9db13c0bf" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Episode 1: Total Reward = -0.4062510058283806 is_success : True\n", + "Episode 2: Total Reward = -0.7712566517293453 is_success : True\n", + "Episode 3: Total Reward = -0.22577803954482079 is_success : True\n", + "Episode 4: Total Reward = -0.042276110500097275 is_success : True\n", + "Episode 5: Total Reward = -0.618223849684 is_success : True\n", + "Episode 6: Total Reward = -0.04875709488987923 is_success : True\n", + "Episode 7: Total Reward = -0.5882552899420261 is_success : True\n", + "Episode 8: Total Reward = -0.16985227167606354 is_success : True\n", + "Episode 9: Total Reward = -0.17977378517389297 is_success : True\n", + "Episode 10: Total Reward = -0.03690134733915329 is_success : True\n", + "Episode 11: Total Reward = -1.8714497312903404 is_success : True\n", + "Episode 12: Total Reward = -0.35330819338560104 is_success : True\n", + "Episode 13: Total Reward = -0.31165493465960026 is_success : True\n", + "Episode 14: Total Reward = -0.5640652924776077 is_success : True\n", + "Episode 15: Total Reward = -0.33318396657705307 is_success : True\n", + "Episode 16: Total Reward = -0.6635698936879635 is_success : True\n", + "Episode 17: Total Reward = -0.35100577771663666 is_success : True\n", + "Episode 18: Total Reward = -0.3555702306330204 is_success : True\n", + "Episode 19: Total Reward = -0.14223862066864967 is_success : True\n", + "Episode 20: Total Reward = -0.09988411515951157 is_success : True\n", + "Episode 21: Total Reward = -0.4415702186524868 is_success : True\n", + "Episode 22: Total Reward = -0.14504414796829224 is_success : True\n", + "Episode 23: Total Reward = -0.48955822736024857 is_success : True\n", + "Episode 24: Total Reward = -0.868715662509203 is_success : True\n", + "Episode 25: Total Reward = -0.19161485880613327 is_success : True\n", + "Episode 26: Total Reward = -0.6230216957628727 is_success : True\n", + "Episode 27: Total Reward = -0.4985925517976284 is_success : True\n", + "Episode 28: Total Reward = -0.293687392026186 is_success : True\n", + "Episode 29: Total Reward = -0.14396221190690994 is_success : True\n", + "Episode 30: Total Reward = -0.391715832054615 is_success : True\n", + "Episode 31: Total Reward = -0.956602681428194 is_success : True\n", + "Episode 32: Total Reward = -0.1682831607758999 is_success : True\n", + "Episode 33: Total Reward = -0.1536100059747696 is_success : True\n", + "Episode 34: Total Reward = -0.345889188349247 is_success : True\n", + "Episode 35: Total Reward = -0.0851143728941679 is_success : True\n", + "Episode 36: Total Reward = -0.21177243813872337 is_success : True\n", + "Episode 37: Total Reward = -0.42292168363928795 is_success : True\n", + "Episode 38: Total Reward = -0.39669718965888023 is_success : True\n", + "Episode 39: Total Reward = -0.2870192937552929 is_success : True\n", + "Episode 40: Total Reward = -0.1431233212351799 is_success : True\n", + "Episode 41: Total Reward = -0.2012624740600586 is_success : True\n", + "Episode 42: Total Reward = -0.34745531901717186 is_success : True\n", + "Episode 43: Total Reward = -0.44094180688261986 is_success : True\n", + "Episode 44: Total Reward = -0.35268260911107063 is_success : True\n", + "Episode 45: Total Reward = -0.3315625675022602 is_success : True\n", + "Episode 46: Total Reward = -0.619943555444479 is_success : True\n", + "Episode 47: Total Reward = -1.0147716626524925 is_success : True\n", + "Episode 48: Total Reward = -0.6043238863348961 is_success : True\n", + "Episode 49: Total Reward = -0.35877181962132454 is_success : True\n", + "Episode 50: Total Reward = -0.2241729274392128 is_success : True\n", + "Episode 51: Total Reward = -0.19858066737651825 is_success : True\n", + "Episode 52: Total Reward = -0.04564374312758446 is_success : True\n", + "Episode 53: Total Reward = -0.22833451628684998 is_success : True\n", + "Episode 54: Total Reward = -0.8358038850128651 is_success : True\n", + "Episode 55: Total Reward = -0.7864313051104546 is_success : True\n", + "Episode 56: Total Reward = -0.3810633607208729 is_success : True\n", + "Episode 57: Total Reward = -0.11827689781785011 is_success : True\n", + "Episode 58: Total Reward = -0.5082429815083742 is_success : True\n", + "Episode 59: Total Reward = -0.5666449964046478 is_success : True\n", + "Episode 60: Total Reward = -0.30292200297117233 is_success : True\n", + "Episode 61: Total Reward = -0.6551055945456028 is_success : True\n", + "Episode 62: Total Reward = -2.1062250286340714 is_success : True\n", + "Episode 63: Total Reward = -0.43518895097076893 is_success : True\n", + "Episode 64: Total Reward = -0.12211803160607815 is_success : True\n", + "Episode 65: Total Reward = -0.5217533744871616 is_success : True\n", + "Episode 66: Total Reward = -0.2624095845967531 is_success : True\n", + "Episode 67: Total Reward = -0.6889664717018604 is_success : True\n", + "Episode 68: Total Reward = -0.1458827406167984 is_success : True\n", + "Episode 69: Total Reward = -0.6224392205476761 is_success : True\n", + "Episode 70: Total Reward = -0.5712128654122353 is_success : True\n", + "Episode 71: Total Reward = -0.32383403554558754 is_success : True\n", + "Episode 72: Total Reward = -0.9531832113862038 is_success : True\n", + "Episode 73: Total Reward = -0.3022409453988075 is_success : True\n", + "Episode 74: Total Reward = -0.1974497027695179 is_success : True\n", + "Episode 75: Total Reward = -0.46416498720645905 is_success : True\n", + "Episode 76: Total Reward = -0.171061422675848 is_success : True\n", + "Episode 77: Total Reward = -0.13197795674204826 is_success : True\n", + "Episode 78: Total Reward = -0.40117064118385315 is_success : True\n", + "Episode 79: Total Reward = -0.3267452251166105 is_success : True\n", + "Episode 80: Total Reward = -0.013564204797148705 is_success : True\n", + "Episode 81: Total Reward = -0.36473673209547997 is_success : True\n", + "Episode 82: Total Reward = -0.20988689735531807 is_success : True\n", + "Episode 83: Total Reward = -1.1261731199920177 is_success : True\n", + "Episode 84: Total Reward = -0.21271120756864548 is_success : True\n", + "Episode 85: Total Reward = -0.36954135820269585 is_success : True\n", + "Episode 86: Total Reward = -0.1980939283967018 is_success : True\n", + "Episode 87: Total Reward = -0.8878751918673515 is_success : True\n", + "Episode 88: Total Reward = -0.35339905321598053 is_success : True\n", + "Episode 89: Total Reward = -0.033909156918525696 is_success : True\n", + "Episode 90: Total Reward = -2.02072698995471 is_success : True\n", + "Episode 91: Total Reward = -0.5981295704841614 is_success : True\n", + "Episode 92: Total Reward = -0.310416866093874 is_success : True\n", + "Episode 93: Total Reward = -0.24613886699080467 is_success : True\n", + "Episode 94: Total Reward = -0.7379350885748863 is_success : True\n", + "Episode 95: Total Reward = -0.2529986910521984 is_success : True\n", + "Episode 96: Total Reward = -0.7348614633083344 is_success : True\n", + "Episode 97: Total Reward = -0.13540641963481903 is_success : True\n", + "Episode 98: Total Reward = -0.22798261418938637 is_success : True\n", + "Episode 99: Total Reward = -0.02772532030940056 is_success : True\n", + "Episode 100: Total Reward = -0.37505700439214706 is_success : True\n", + "\n", + "Success Rate: 100.00% (100/100)\n", + "Warning: Unable to close environment properly: 'RecordVideo' object has no attribute 'enabled'\n" + ] }, - "3675a21548ee4857b98b3bc0a9c206f3": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<Figure size 1000x500 with 1 Axes>" + ], + "image/png": "\n" + }, + "metadata": {} }, - "463f22dcd9da484f98795b42c7406fb4": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [] + }, + "metadata": {} }, - "632684e2f5e648f3ad8eb6f65acbdd6b": { - "model_module": "@jupyter-widgets/controls", - "model_name": "LabelModel", - "model_module_version": "1.5.0", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "LabelModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "LabelView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_b5bc5fcb6fea4586839b5dc6e43ce0f9", - "placeholder": "", - "style": "IPY_MODEL_decc3d18711a459badab9c4def213303", - "value": "Connecting..." - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "<br> <style><br> .wandb-row {<br> display: flex;<br> flex-direction: row;<br> flex-wrap: wrap;<br> justify-content: flex-start;<br> width: 100%;<br> }<br> .wandb-col {<br> display: flex;<br> flex-direction: column;<br> flex-basis: 100%;<br> flex: 1;<br> padding: 10px;<br> }<br> </style><br><div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>Episode</td><td>▁▂▃▃▃▄▄▄▄▄▄▅▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▆▆▆▆▆▇▇▇▇█</td></tr><tr><td>Success Rate</td><td>▁</td></tr><tr><td>Total Reward</td><td>▇▇██▁█▇▇▇███████▇▇▆██▇▇▇▆██▇▇▇██▇▇▇█▇▆▇█</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>Episode</td><td>100</td></tr><tr><td>Success Rate</td><td>100</td></tr><tr><td>Total Reward</td><td>-0.37506</td></tr><tr><td>is_success</td><td>True</td></tr></table><br/></div></div>" + ] + }, + "metadata": {} }, - "b5bc5fcb6fea4586839b5dc6e43ce0f9": { - "model_module": "@jupyter-widgets/base", - "model_name": "LayoutModel", - "model_module_version": "1.2.0", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run <strong style=\"color:#cdcd00\">evaluation</strong> at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/62kngzah' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym/runs/62kngzah</a><br> View project at: <a href='https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym' target=\"_blank\">https://wandb.ai/benyahiamohammedoussama-ecole-central-lyon/panda-gym</a><br>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)" + ] + }, + "metadata": {} }, - "decc3d18711a459badab9c4def213303": { - "model_module": "@jupyter-widgets/controls", - "model_name": "DescriptionStyleModel", - "model_module_version": "1.5.0", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Find logs at: <code>./wandb/run-20250226_150848-62kngzah/logs</code>" + ] + }, + "metadata": {} } - } + ] } - }, - "nbformat": 4, - "nbformat_minor": 0 + ] } \ No newline at end of file -- GitLab