diff --git a/RL_cartepole__1_.ipynb b/RL_cartepole__1_.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d6cf64cfd2225bdca6bd84500b08855fcc99631c --- /dev/null +++ b/RL_cartepole__1_.ipynb @@ -0,0 +1,9073 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "3bf20325eeb94361964c8a1620c38727": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0fdb788454a34aa9bc545568ed8afad7", + "IPY_MODEL_c61ff44a39204ce88d3a43c02388b6e6", + "IPY_MODEL_e650459cdbdc425ca554d4f66c3d6d40" + ], + "layout": "IPY_MODEL_9ff1bdcd30904e669984f48e4a93ae0c" + } + }, + "0fdb788454a34aa9bc545568ed8afad7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_69ece13913d844ca8e7209cc125e2d7b", + "placeholder": "", + "style": "IPY_MODEL_3525ebe3a5944b278adff0cd7c59a5d7", + "value": "ECL-TD-RL1-a2c_cartpole.zip: 100%" + } + }, + "c61ff44a39204ce88d3a43c02388b6e6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_656ba980bd9247f0b9f9c4ecf8b1e4a7", + "max": 98147, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_572fe508383646baaf82b2349366c671", + "value": 98147 + } + }, + "e650459cdbdc425ca554d4f66c3d6d40": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c9f0b1c6f1a74c238ccb347bee20b397", + "placeholder": "", + "style": "IPY_MODEL_9d8e4e68afed4d90a59c4293cd4a6b9d", + "value": " 98.1k/98.1k [00:00<00:00, 155kB/s]" + } + }, + "9ff1bdcd30904e669984f48e4a93ae0c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "69ece13913d844ca8e7209cc125e2d7b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3525ebe3a5944b278adff0cd7c59a5d7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "656ba980bd9247f0b9f9c4ecf8b1e4a7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "572fe508383646baaf82b2349366c671": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c9f0b1c6f1a74c238ccb347bee20b397": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9d8e4e68afed4d90a59c4293cd4a6b9d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c14a53fd35174f3ba632a22e3c9dda47": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_02913ffc7b024793ab252e74c0427aa9", + "IPY_MODEL_53f02b9c14564874baecde3983423440" + ], + "layout": "IPY_MODEL_83c1d52aef7d421db0657103710bcd06" + } + }, + "02913ffc7b024793ab252e74c0427aa9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "LabelModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "LabelModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "LabelView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9346924fc0d14e10924684c0ab74891c", + "placeholder": "", + "style": "IPY_MODEL_4cf73107b2a74863a612244964f0fc04", + "value": "1.097 MB of 1.097 MB uploaded (0.008 MB deduped)\r" + } + }, + "53f02b9c14564874baecde3983423440": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c056c0b446134ae69bdc93bd93f3af13", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3d54d2c162b34319bfd1428fb18fd181", + "value": 1 + } + }, + "83c1d52aef7d421db0657103710bcd06": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9346924fc0d14e10924684c0ab74891c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4cf73107b2a74863a612244964f0fc04": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c056c0b446134ae69bdc93bd93f3af13": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3d54d2c162b34319bfd1428fb18fd181": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a47211d565fb45fe95b30b99885c4dbd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_df1580d505af40828061a4c042546f76", + "IPY_MODEL_4627448bbe6d40178e356623c281803f", + "IPY_MODEL_92339b2faa4b47c69b5457aa9631edf7" + ], + "layout": "IPY_MODEL_a337aa65ec8b498f87cb74caa342a6a3" + } + }, + "df1580d505af40828061a4c042546f76": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c1fbb5d26065440aa5eeee238d0d1d38", + "placeholder": "", + "style": "IPY_MODEL_0447228f33a344cd91a2e18cc73cd63e", + "value": "ECL-TD-RL1-a2c_panda_reach.zip: 100%" + } + }, + "4627448bbe6d40178e356623c281803f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4657c6ea689f494a9254c69d2a08dd4e", + "max": 110009, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ef2b6851a3784430949ecf77410e3386", + "value": 110009 + } + }, + "92339b2faa4b47c69b5457aa9631edf7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_178580de1f0b4840825b70ffebaf00c2", + "placeholder": "", + "style": "IPY_MODEL_6aeac63ed6e840c59814923109a66f58", + "value": " 110k/110k [00:00<00:00, 103kB/s]" + } + }, + "a337aa65ec8b498f87cb74caa342a6a3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c1fbb5d26065440aa5eeee238d0d1d38": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0447228f33a344cd91a2e18cc73cd63e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4657c6ea689f494a9254c69d2a08dd4e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ef2b6851a3784430949ecf77410e3386": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "178580de1f0b4840825b70ffebaf00c2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6aeac63ed6e840c59814923109a66f58": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "!pip install gymnasium\n", + "!pip install pyglet==2.0.10\n", + "!pip install pygame==2.5.2\n", + "!pip install PyQt5\n", + "!pip install huggingface-sb3==2.3.1\n", + "!pip install wandb tensorboard\n", + "!apt-get update && apt-get install ffmpeg freeglut3-dev xvfb # For visualization\n", + "!pip install \"stable-baselines3[extra]>=2.0.0a4\"\n", + "!pip install panda-gym\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vg3Sp873bVEU", + "outputId": "6dbcdd9b-dfbb-4edb-8ab0-b8d3b9056cfb" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: gymnasium in /usr/local/lib/python3.10/dist-packages (0.29.1)\n", + "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.25.2)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.10.0)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (0.0.4)\n", + "Requirement already satisfied: pyglet==2.0.10 in /usr/local/lib/python3.10/dist-packages (2.0.10)\n", + "Requirement already satisfied: pygame==2.5.2 in /usr/local/lib/python3.10/dist-packages (2.5.2)\n", + "Requirement already satisfied: PyQt5 in /usr/local/lib/python3.10/dist-packages (5.15.10)\n", + "Requirement already satisfied: PyQt5-sip<13,>=12.13 in /usr/local/lib/python3.10/dist-packages (from PyQt5) (12.13.0)\n", + "Requirement already satisfied: PyQt5-Qt5>=5.15.2 in /usr/local/lib/python3.10/dist-packages (from PyQt5) (5.15.2)\n", + "Requirement already satisfied: huggingface-sb3==2.3.1 in /usr/local/lib/python3.10/dist-packages (2.3.1)\n", + "Requirement already satisfied: huggingface-hub~=0.8 in /usr/local/lib/python3.10/dist-packages (from huggingface-sb3==2.3.1) (0.20.3)\n", + "Requirement already satisfied: pyyaml~=6.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-sb3==2.3.1) (6.0.1)\n", + "Requirement already satisfied: wasabi in /usr/local/lib/python3.10/dist-packages (from huggingface-sb3==2.3.1) (1.1.2)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from huggingface-sb3==2.3.1) (1.25.2)\n", + "Requirement already satisfied: cloudpickle>=1.6 in /usr/local/lib/python3.10/dist-packages (from huggingface-sb3==2.3.1) (2.2.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2023.6.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (4.66.2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (4.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3==2.3.1) (23.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3==2.3.1) (2024.2.2)\n", + "Requirement already satisfied: wandb in /usr/local/lib/python3.10/dist-packages (0.16.3)\n", + "Requirement already satisfied: tensorboard in /usr/local/lib/python3.10/dist-packages (2.15.2)\n", + "Requirement already satisfied: Click!=8.0.0,>=7.1 in /usr/local/lib/python3.10/dist-packages (from wandb) (8.1.7)\n", + "Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.1.42)\n", + "Requirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (2.31.0)\n", + "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (5.9.5)\n", + "Requirement already satisfied: sentry-sdk>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (1.40.6)\n", + "Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (0.4.0)\n", + "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from wandb) (6.0.1)\n", + "Requirement already satisfied: setproctitle in /usr/local/lib/python3.10/dist-packages (from wandb) (1.3.3)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb) (67.7.2)\n", + "Requirement already satisfied: appdirs>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from wandb) (1.4.4)\n", + "Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb) (3.20.3)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.62.0)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (2.27.0)\n", + "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.2.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (3.5.2)\n", + "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.25.2)\n", + "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.16.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (3.0.1)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from GitPython!=3.1.29,>=1.0.0->wandb) (4.0.11)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard) (5.3.3)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard) (1.3.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.0.0->wandb) (2024.2.2)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard) (2.1.5)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb) (5.0.1)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard) (0.5.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard) (3.2.2)\n", + "Hit:1 http://security.ubuntu.com/ubuntu jammy-security InRelease\n", + "Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease\n", + "Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", + "Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease\n", + "Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease\n", + "Hit:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n", + "Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n", + "Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n", + "Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n", + "Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n", + "Reading package lists... Done\n", + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "freeglut3-dev is already the newest version (2.8.1-6).\n", + "ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n", + "xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.8).\n", + "0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.\n", + "Requirement already satisfied: stable-baselines3[extra]>=2.0.0a4 in /usr/local/lib/python3.10/dist-packages (2.3.0a2)\n", + "Requirement already satisfied: gymnasium<0.30,>=0.28.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (0.29.1)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (1.25.2)\n", + "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.1.0+cu121)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.2.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (1.5.3)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (3.7.1)\n", + "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (4.8.0.76)\n", + "Requirement already satisfied: pygame in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.5.2)\n", + "Requirement already satisfied: tensorboard>=2.9.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.15.2)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (5.9.5)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (4.66.2)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (13.7.1)\n", + "Requirement already satisfied: shimmy[atari]~=1.3.0 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (1.3.0)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (9.4.0)\n", + "Requirement already satisfied: autorom[accept-rom-license]~=0.6.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (0.6.1)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (8.1.7)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (2.31.0)\n", + "Requirement already satisfied: AutoROM.accept-rom-license in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (0.6.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra]>=2.0.0a4) (4.10.0)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra]>=2.0.0a4) (0.0.4)\n", + "Requirement already satisfied: ale-py~=0.8.1 in /usr/local/lib/python3.10/dist-packages (from shimmy[atari]~=1.3.0->stable-baselines3[extra]>=2.0.0a4) (0.8.1)\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.62.0)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (2.27.0)\n", + "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.2.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.5.2)\n", + "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.20.3)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (67.7.2)\n", + "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.16.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (3.13.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (3.1.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (2.1.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (4.49.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (23.2)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->stable-baselines3[extra]>=2.0.0a4) (2023.4)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]>=2.0.0a4) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]>=2.0.0a4) (2.16.1)\n", + "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from ale-py~=0.8.1->shimmy[atari]~=1.3.0->stable-baselines3[extra]>=2.0.0a4) (6.1.2)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (5.3.3)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.3.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->stable-baselines3[extra]>=2.0.0a4) (0.1.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (2024.2.2)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (2.1.5)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (1.3.0)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (0.5.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.2.2)\n", + "Collecting panda-gym\n", + " Downloading panda_gym-3.0.7-py3-none-any.whl (23 kB)\n", + "Requirement already satisfied: gymnasium>=0.26 in /usr/local/lib/python3.10/dist-packages (from panda-gym) (0.29.1)\n", + "Collecting pybullet (from panda-gym)\n", + " Downloading pybullet-3.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (103.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.2/103.2 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from panda-gym) (1.25.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from panda-gym) (1.11.4)\n", + "Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda-gym) (2.2.1)\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda-gym) (4.10.0)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda-gym) (0.0.4)\n", + "Installing collected packages: pybullet, panda-gym\n", + "Successfully installed panda-gym-3.0.7 pybullet-3.2.6\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ebzIUtg8Ysbf" + }, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "import torch.nn.functional as F\n", + "from torch.distributions import Categorical\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 1.0 Cartepole using OpenAI Gym Environement with A custom policy model" + ], + "metadata": { + "id": "QIxa36SjeEfG" + } + }, + { + "cell_type": "code", + "source": [ + "# Create the environment\n", + "env = gym.make(\"CartPole-v1\", render_mode=\"human\")\n", + "\n", + "# Reset the environment and get the initial observation\n", + "observation = env.reset()\n", + "\n", + "state_size = env.observation_space.shape[0]\n", + "action_size = env.action_space.n\n", + "# Define the agent neural network model\n", + "class Policy(nn.Module):\n", + " def __init__(self, state_size, action_size, hidden_size=128):\n", + " super(Policy, self).__init__()\n", + " self.fc1 = nn.Linear(state_size, hidden_size)\n", + " self.relu = nn.ReLU()\n", + " self.dropout = nn.Dropout(p=0.6) # Adjust dropout probability as needed\n", + " self.fc2 = nn.Linear(hidden_size, action_size)\n", + "\n", + " def forward(self, x):\n", + " x = self.fc1(x)\n", + " x = self.relu(x)\n", + " x = self.dropout(x)\n", + " x = self.fc2(x)\n", + " return F.softmax(x)\n", + "\n", + "policy_model = Policy(state_size, action_size)\n", + "optimizer = optim.Adam(policy_model.parameters(), lr=5e-3)\n", + "\n", + "gamma = 0.99\n", + "episodes_rewards = []\n", + "\n", + "for i in range(500):\n", + " # Reset the environment\n", + " # init buffers\n", + " observation, info = env.reset(seed=42)\n", + " episode_rewards = []\n", + " logarithmich_probabilities = []\n", + " terminated = False\n", + " # Render the environment to visualize the agent's behavior\n", + " env.render()\n", + "\n", + " while terminated == False:\n", + " # Get action probabilities from the policy model\n", + " action_probabilities = policy_model(torch.tensor(observation, dtype=torch.float32))\n", + " action_distribution = Categorical(action_probabilities)\n", + "\n", + " # Sample an action from the action distribution\n", + " action = action_distribution.sample()\n", + " logarithmich_probability = action_distribution.log_prob(action)\n", + " logarithmich_probabilities.append(logarithmich_probability)\n", + " print(int(action.item()))\n", + " # Take a step in the environment\n", + " #print(env.step(action.item()))\n", + " next_observation, reward, done, a, b = env.step(action.item())\n", + " episode_rewards.append(reward)\n", + "\n", + " # Update observation\n", + " observation = next_observation\n", + "\n", + "\n", + " # Compute the return for the episode\n", + " returns = []\n", + " R = 0\n", + " for r in reversed(episode_rewards):\n", + " R = r + gamma * R\n", + " returns.insert(0, R)\n", + "\n", + " # Compute the policy loss\n", + " policy_loss = torch.tensor([-loga_prob * R for loga_prob, R in zip(logarithmich_probabilities, returns)]).sum()\n", + " episodes_rewards += [-policy_loss]\n", + " # Update the policy model\n", + " optimizer.zero_grad()\n", + " policy_loss.backward()\n", + " optimizer.step()\n", + "\n", + "\n", + "env.close()\n" + ], + "metadata": { + "id": "2CPcLVf-YzDK", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "6cdf851e-3024-4b23-9cf1-7f60b75f6fd4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "<ipython-input-3-38caa95ef5d7>:23: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n", + " return F.softmax(x)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/gymnasium/envs/classic_control/cartpole.py:180: UserWarning: \u001b[33mWARN: You are calling 'step()' even though this environment has already returned terminated = True. You should always call 'reset()' once you receive 'terminated = True' -- any further steps are undefined behavior.\u001b[0m\n", + " logger.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "1\n", + "0\n", + "1\n", + "1\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "0\n", + "1\n", + "0\n", + "1\n", + "0\n", + "0\n", + "0\n", + "0\n", + "1\n", + "1\n", + "0\n", + "0\n", + "0\n", + "1\n" + ] + }, + { + "output_type": "error", + "ename": "OverflowError", + "evalue": "signed short integer is less than minimum", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-3-38caa95ef5d7>\u001b[0m in \u001b[0;36m<cell line: 31>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;31m# Take a step in the environment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m#print(env.step(action.item()))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0mnext_observation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 54\u001b[0m \u001b[0mepisode_rewards\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreward\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/gymnasium/wrappers/time_limit.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \"\"\"\n\u001b[0;32m---> 57\u001b[0;31m \u001b[0mobservation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mterminated\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtruncated\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minfo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_elapsed_steps\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/gymnasium/wrappers/order_enforcing.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_has_reset\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mResetNeeded\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Cannot call env.step() before calling env.reset()\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mreset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/gymnasium/wrappers/env_checker.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0menv_step_passive_checker\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 51\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maction\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 52\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mreset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/gymnasium/envs/classic_control/cartpole.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender_mode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"human\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 190\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 191\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mterminated\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/gymnasium/envs/classic_control/cartpole.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[0mgfxdraw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilled_polygon\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msurf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpole_coords\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m202\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m152\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m101\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 281\u001b[0;31m gfxdraw.aacircle(\n\u001b[0m\u001b[1;32m 282\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msurf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcartx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mOverflowError\u001b[0m: signed short integer is less than minimum" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Plot the policy loss against iterations\n", + "plt.plot([i for i in range(0,500)],episodes_rewards)\n", + "plt.xlabel('Iterations')\n", + "plt.ylabel('Policy Loss')\n", + "plt.title('Policy Loss vs. Iterations')\n", + "plt.show()" + ], + "metadata": { + "id": "hY52xEWUSlUV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# 2.0 cartpole with stable-baselines3\n", + "\n", + "\n" + ], + "metadata": { + "id": "StaIGs4U6ez6" + } + }, + { + "cell_type": "code", + "source": [ + "!apt-get update && apt-get install ffmpeg freeglut3-dev xvfb # For visualization\n", + "!pip install \"stable-baselines3[extra]>=2.0.0a4\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "byLoAEfZp-33", + "outputId": "053b32f9-42c2-44e7-e6ef-5a916059b471" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\r0% [Working]\r \rGet:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]\n", + "Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", + "Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]\n", + "Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [50.4 kB]\n", + "Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease\n", + "Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]\n", + "Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n", + "Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n", + "Get:9 http://security.ubuntu.com/ubuntu jammy-security/restricted amd64 Packages [1,894 kB]\n", + "Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n", + "Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n", + "Hit:12 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n", + "Get:13 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,074 kB]\n", + "Get:14 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [1,522 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu jammy-updates/restricted amd64 Packages [1,932 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [1,801 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,346 kB]\n", + "Fetched 9,854 kB in 3s (3,522 kB/s)\n", + "Reading package lists... Done\n", + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n", + "The following additional packages will be installed:\n", + " freeglut3 libegl-dev libfontenc1 libgl-dev libgl1-mesa-dev libgles-dev libgles1 libglu1-mesa\n", + " libglu1-mesa-dev libglvnd-core-dev libglvnd-dev libglx-dev libice-dev libopengl-dev libsm-dev\n", + " libxfont2 libxkbfile1 libxt-dev x11-xkb-utils xfonts-base xfonts-encodings xfonts-utils\n", + " xserver-common\n", + "Suggested packages:\n", + " libice-doc libsm-doc libxt-doc\n", + "The following NEW packages will be installed:\n", + " freeglut3 freeglut3-dev libegl-dev libfontenc1 libgl-dev libgl1-mesa-dev libgles-dev libgles1\n", + " libglu1-mesa libglu1-mesa-dev libglvnd-core-dev libglvnd-dev libglx-dev libice-dev libopengl-dev\n", + " libsm-dev libxfont2 libxkbfile1 libxt-dev x11-xkb-utils xfonts-base xfonts-encodings xfonts-utils\n", + " xserver-common xvfb\n", + "0 upgraded, 25 newly installed, 0 to remove and 41 not upgraded.\n", + "Need to get 9,075 kB of archives.\n", + "After this operation, 18.7 MB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 freeglut3 amd64 2.8.1-6 [74.0 kB]\n", + "Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libglx-dev amd64 1.4.0-1 [14.1 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libgl-dev amd64 1.4.0-1 [101 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu jammy/main amd64 libglvnd-core-dev amd64 1.4.0-1 [12.7 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 libegl-dev amd64 1.4.0-1 [18.0 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu jammy/main amd64 libgles1 amd64 1.4.0-1 [11.5 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu jammy/main amd64 libgles-dev amd64 1.4.0-1 [49.4 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu jammy/main amd64 libopengl-dev amd64 1.4.0-1 [3,400 B]\n", + "Get:9 http://archive.ubuntu.com/ubuntu jammy/main amd64 libglvnd-dev amd64 1.4.0-1 [3,162 B]\n", + "Get:10 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 libgl1-mesa-dev amd64 23.2.1-1ubuntu3.1~22.04.2 [6,842 B]\n", + "Get:11 http://archive.ubuntu.com/ubuntu jammy/main amd64 libglu1-mesa amd64 9.0.2-1 [145 kB]\n", + "Get:12 http://archive.ubuntu.com/ubuntu jammy/main amd64 libglu1-mesa-dev amd64 9.0.2-1 [231 kB]\n", + "Get:13 http://archive.ubuntu.com/ubuntu jammy/main amd64 libice-dev amd64 2:1.0.10-1build2 [51.4 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu jammy/main amd64 libsm-dev amd64 2:1.2.3-1build2 [18.1 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxt-dev amd64 1:1.2.1-1 [396 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu jammy/universe amd64 freeglut3-dev amd64 2.8.1-6 [126 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu jammy/main amd64 libfontenc1 amd64 1:1.1.4-1build3 [14.7 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxfont2 amd64 1:2.0.5-1build1 [94.5 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxkbfile1 amd64 1:1.1.0-1build3 [71.8 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu jammy/main amd64 x11-xkb-utils amd64 7.7+5build4 [172 kB]\n", + "Get:21 http://archive.ubuntu.com/ubuntu jammy/main amd64 xfonts-encodings all 1:1.0.5-0ubuntu2 [578 kB]\n", + "Get:22 http://archive.ubuntu.com/ubuntu jammy/main amd64 xfonts-utils amd64 1:7.7+6build2 [94.6 kB]\n", + "Get:23 http://archive.ubuntu.com/ubuntu jammy/main amd64 xfonts-base all 1:1.0.5 [5,896 kB]\n", + "Get:24 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 xserver-common all 2:21.1.4-2ubuntu1.7~22.04.8 [28.6 kB]\n", + "Get:25 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 xvfb amd64 2:21.1.4-2ubuntu1.7~22.04.8 [863 kB]\n", + "Fetched 9,075 kB in 4s (2,098 kB/s)\n", + "Selecting previously unselected package freeglut3:amd64.\n", + "(Reading database ... 121749 files and directories currently installed.)\n", + "Preparing to unpack .../00-freeglut3_2.8.1-6_amd64.deb ...\n", + "Unpacking freeglut3:amd64 (2.8.1-6) ...\n", + "Selecting previously unselected package libglx-dev:amd64.\n", + "Preparing to unpack .../01-libglx-dev_1.4.0-1_amd64.deb ...\n", + "Unpacking libglx-dev:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libgl-dev:amd64.\n", + "Preparing to unpack .../02-libgl-dev_1.4.0-1_amd64.deb ...\n", + "Unpacking libgl-dev:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libglvnd-core-dev:amd64.\n", + "Preparing to unpack .../03-libglvnd-core-dev_1.4.0-1_amd64.deb ...\n", + "Unpacking libglvnd-core-dev:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libegl-dev:amd64.\n", + "Preparing to unpack .../04-libegl-dev_1.4.0-1_amd64.deb ...\n", + "Unpacking libegl-dev:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libgles1:amd64.\n", + "Preparing to unpack .../05-libgles1_1.4.0-1_amd64.deb ...\n", + "Unpacking libgles1:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libgles-dev:amd64.\n", + "Preparing to unpack .../06-libgles-dev_1.4.0-1_amd64.deb ...\n", + "Unpacking libgles-dev:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libopengl-dev:amd64.\n", + "Preparing to unpack .../07-libopengl-dev_1.4.0-1_amd64.deb ...\n", + "Unpacking libopengl-dev:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libglvnd-dev:amd64.\n", + "Preparing to unpack .../08-libglvnd-dev_1.4.0-1_amd64.deb ...\n", + "Unpacking libglvnd-dev:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libgl1-mesa-dev:amd64.\n", + "Preparing to unpack .../09-libgl1-mesa-dev_23.2.1-1ubuntu3.1~22.04.2_amd64.deb ...\n", + "Unpacking libgl1-mesa-dev:amd64 (23.2.1-1ubuntu3.1~22.04.2) ...\n", + "Selecting previously unselected package libglu1-mesa:amd64.\n", + "Preparing to unpack .../10-libglu1-mesa_9.0.2-1_amd64.deb ...\n", + "Unpacking libglu1-mesa:amd64 (9.0.2-1) ...\n", + "Selecting previously unselected package libglu1-mesa-dev:amd64.\n", + "Preparing to unpack .../11-libglu1-mesa-dev_9.0.2-1_amd64.deb ...\n", + "Unpacking libglu1-mesa-dev:amd64 (9.0.2-1) ...\n", + "Selecting previously unselected package libice-dev:amd64.\n", + "Preparing to unpack .../12-libice-dev_2%3a1.0.10-1build2_amd64.deb ...\n", + "Unpacking libice-dev:amd64 (2:1.0.10-1build2) ...\n", + "Selecting previously unselected package libsm-dev:amd64.\n", + "Preparing to unpack .../13-libsm-dev_2%3a1.2.3-1build2_amd64.deb ...\n", + "Unpacking libsm-dev:amd64 (2:1.2.3-1build2) ...\n", + "Selecting previously unselected package libxt-dev:amd64.\n", + "Preparing to unpack .../14-libxt-dev_1%3a1.2.1-1_amd64.deb ...\n", + "Unpacking libxt-dev:amd64 (1:1.2.1-1) ...\n", + "Selecting previously unselected package freeglut3-dev:amd64.\n", + "Preparing to unpack .../15-freeglut3-dev_2.8.1-6_amd64.deb ...\n", + "Unpacking freeglut3-dev:amd64 (2.8.1-6) ...\n", + "Selecting previously unselected package libfontenc1:amd64.\n", + "Preparing to unpack .../16-libfontenc1_1%3a1.1.4-1build3_amd64.deb ...\n", + "Unpacking libfontenc1:amd64 (1:1.1.4-1build3) ...\n", + "Selecting previously unselected package libxfont2:amd64.\n", + "Preparing to unpack .../17-libxfont2_1%3a2.0.5-1build1_amd64.deb ...\n", + "Unpacking libxfont2:amd64 (1:2.0.5-1build1) ...\n", + "Selecting previously unselected package libxkbfile1:amd64.\n", + "Preparing to unpack .../18-libxkbfile1_1%3a1.1.0-1build3_amd64.deb ...\n", + "Unpacking libxkbfile1:amd64 (1:1.1.0-1build3) ...\n", + "Selecting previously unselected package x11-xkb-utils.\n", + "Preparing to unpack .../19-x11-xkb-utils_7.7+5build4_amd64.deb ...\n", + "Unpacking x11-xkb-utils (7.7+5build4) ...\n", + "Selecting previously unselected package xfonts-encodings.\n", + "Preparing to unpack .../20-xfonts-encodings_1%3a1.0.5-0ubuntu2_all.deb ...\n", + "Unpacking xfonts-encodings (1:1.0.5-0ubuntu2) ...\n", + "Selecting previously unselected package xfonts-utils.\n", + "Preparing to unpack .../21-xfonts-utils_1%3a7.7+6build2_amd64.deb ...\n", + "Unpacking xfonts-utils (1:7.7+6build2) ...\n", + "Selecting previously unselected package xfonts-base.\n", + "Preparing to unpack .../22-xfonts-base_1%3a1.0.5_all.deb ...\n", + "Unpacking xfonts-base (1:1.0.5) ...\n", + "Selecting previously unselected package xserver-common.\n", + "Preparing to unpack .../23-xserver-common_2%3a21.1.4-2ubuntu1.7~22.04.8_all.deb ...\n", + "Unpacking xserver-common (2:21.1.4-2ubuntu1.7~22.04.8) ...\n", + "Selecting previously unselected package xvfb.\n", + "Preparing to unpack .../24-xvfb_2%3a21.1.4-2ubuntu1.7~22.04.8_amd64.deb ...\n", + "Unpacking xvfb (2:21.1.4-2ubuntu1.7~22.04.8) ...\n", + "Setting up freeglut3:amd64 (2.8.1-6) ...\n", + "Setting up libglvnd-core-dev:amd64 (1.4.0-1) ...\n", + "Setting up libice-dev:amd64 (2:1.0.10-1build2) ...\n", + "Setting up libsm-dev:amd64 (2:1.2.3-1build2) ...\n", + "Setting up libfontenc1:amd64 (1:1.1.4-1build3) ...\n", + "Setting up libxt-dev:amd64 (1:1.2.1-1) ...\n", + "Setting up libgles1:amd64 (1.4.0-1) ...\n", + "Setting up xfonts-encodings (1:1.0.5-0ubuntu2) ...\n", + "Setting up libglx-dev:amd64 (1.4.0-1) ...\n", + "Setting up libglu1-mesa:amd64 (9.0.2-1) ...\n", + "Setting up libxkbfile1:amd64 (1:1.1.0-1build3) ...\n", + "Setting up libopengl-dev:amd64 (1.4.0-1) ...\n", + "Setting up libxfont2:amd64 (1:2.0.5-1build1) ...\n", + "Setting up libgl-dev:amd64 (1.4.0-1) ...\n", + "Setting up libegl-dev:amd64 (1.4.0-1) ...\n", + "Setting up x11-xkb-utils (7.7+5build4) ...\n", + "Setting up xfonts-utils (1:7.7+6build2) ...\n", + "Setting up xfonts-base (1:1.0.5) ...\n", + "Setting up libglu1-mesa-dev:amd64 (9.0.2-1) ...\n", + "Setting up xserver-common (2:21.1.4-2ubuntu1.7~22.04.8) ...\n", + "Setting up libgles-dev:amd64 (1.4.0-1) ...\n", + "Setting up xvfb (2:21.1.4-2ubuntu1.7~22.04.8) ...\n", + "Setting up libglvnd-dev:amd64 (1.4.0-1) ...\n", + "Setting up libgl1-mesa-dev:amd64 (23.2.1-1ubuntu3.1~22.04.2) ...\n", + "Setting up freeglut3-dev:amd64 (2.8.1-6) ...\n", + "Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n", + "\n", + "Processing triggers for man-db (2.10.2-1) ...\n", + "Processing triggers for fontconfig (2.13.1-4.2ubuntu5) ...\n", + "Collecting stable-baselines3[extra]>=2.0.0a4\n", + " Downloading stable_baselines3-2.3.0a2-py3-none-any.whl (181 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.8/181.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: gymnasium<0.30,>=0.28.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (0.29.1)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (1.25.2)\n", + "Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.1.0+cu121)\n", + "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.2.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (1.5.3)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (3.7.1)\n", + "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (4.8.0.76)\n", + "Requirement already satisfied: pygame in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.5.2)\n", + "Requirement already satisfied: tensorboard>=2.9.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (2.15.2)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (5.9.5)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (4.66.2)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (13.7.1)\n", + "Collecting shimmy[atari]~=1.3.0 (from stable-baselines3[extra]>=2.0.0a4)\n", + " Downloading Shimmy-1.3.0-py3-none-any.whl (37 kB)\n", + "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]>=2.0.0a4) (9.4.0)\n", + "Collecting autorom[accept-rom-license]~=0.6.1 (from stable-baselines3[extra]>=2.0.0a4)\n", + " Downloading AutoROM-0.6.1-py3-none-any.whl (9.4 kB)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (8.1.7)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (2.31.0)\n", + "Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4)\n", + " Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m434.7/434.7 kB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra]>=2.0.0a4) (4.10.0)\n", + "Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra]>=2.0.0a4) (0.0.4)\n", + "Collecting ale-py~=0.8.1 (from shimmy[atari]~=1.3.0->stable-baselines3[extra]>=2.0.0a4)\n", + " Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.4.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.62.0)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (2.27.0)\n", + "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.2.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.5.2)\n", + "Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.20.3)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (67.7.2)\n", + "Requirement already satisfied: six>1.9 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.16.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (3.13.1)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (3.1.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (2.1.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (4.49.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (1.4.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (23.2)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]>=2.0.0a4) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->stable-baselines3[extra]>=2.0.0a4) (2023.4)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]>=2.0.0a4) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]>=2.0.0a4) (2.16.1)\n", + "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from ale-py~=0.8.1->shimmy[atari]~=1.3.0->stable-baselines3[extra]>=2.0.0a4) (6.1.2)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (5.3.3)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (1.3.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->stable-baselines3[extra]>=2.0.0a4) (0.1.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4) (2024.2.2)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (2.1.5)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->stable-baselines3[extra]>=2.0.0a4) (1.3.0)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (0.5.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]>=2.0.0a4) (3.2.2)\n", + "Building wheels for collected packages: AutoROM.accept-rom-license\n", + " Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for AutoROM.accept-rom-license: filename=AutoROM.accept_rom_license-0.6.1-py3-none-any.whl size=446660 sha256=525219ed92b07d52de7e8c459a922d2e6af7c544a54b55368db234aa0242a7da\n", + " Stored in directory: /root/.cache/pip/wheels/6b/1b/ef/a43ff1a2f1736d5711faa1ba4c1f61be1131b8899e6a057811\n", + "Successfully built AutoROM.accept-rom-license\n", + "Installing collected packages: ale-py, shimmy, AutoROM.accept-rom-license, autorom, stable-baselines3\n", + "Successfully installed AutoROM.accept-rom-license-0.6.1 ale-py-0.8.1 autorom-0.6.1 shimmy-1.3.0 stable-baselines3-2.3.0a2\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import gymnasium as gym\n", + "import numpy as np\n", + "from stable_baselines3.common.evaluation import evaluate_policy\n", + "from stable_baselines3 import A2C\n", + "from huggingface_sb3 import push_to_hub\n", + "from huggingface_hub import login\n", + "\n", + "\n", + "\n", + "print(f\"{gym.__version__=}\")" + ], + "metadata": { + "id": "kTzxDZ4M1X7m", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9f1cf826-ea0a-4f91-c615-876fcd7818fc" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "gym.__version__='0.29.1'\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "env = gym.make(\"CartPole-v1\", render_mode=\"rgb_array\")\n", + "model = A2C(\"MlpPolicy\", env, verbose=1)" + ], + "metadata": { + "id": "1jpK_90YZhwm", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "88980150-d623-495b-ce18-c0368ad389de" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Using cuda device\n", + "Wrapping the env with a `Monitor` wrapper\n", + "Wrapping the env in a DummyVecEnv.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "def evaluate(model, num_episodes=100, deterministic=True):\n", + " \"\"\"\n", + " Evaluate a RL agent\n", + " :param model: (BaseRLModel object) the RL Agent\n", + " :param num_episodes: (int) number of episodes to evaluate it\n", + " :return: (float) Mean reward for the last num_episodes\n", + " \"\"\"\n", + " # This function will only work for a single Environment\n", + " vec_env = model.get_env()\n", + " all_episode_rewards = []\n", + " for i in range(num_episodes):\n", + " episode_rewards = []\n", + " done = False\n", + " obs = vec_env.reset()\n", + " while not done:\n", + " # _states are only useful when using LSTM policies\n", + " action, _states = model.predict(obs, deterministic=deterministic)\n", + " # here, action, rewards and dones are arrays\n", + " # because we are using vectorized env\n", + " # also note that the step only returns a 4-tuple, as the env that is returned\n", + " # by model.get_env() is an sb3 vecenv that wraps the >v0.26 API\n", + " obs, reward, done, info = vec_env.step(action)\n", + " episode_rewards.append(reward)\n", + "\n", + " all_episode_rewards.append(sum(episode_rewards))\n", + "\n", + " mean_episode_reward = np.mean(all_episode_rewards)\n", + " print(\"Mean reward:\", mean_episode_reward, \"Num episodes:\", num_episodes)\n", + "\n", + " return mean_episode_reward" + ], + "metadata": { + "id": "Mih1B33mZSV0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Use a separate environement for evaluation\n", + "eval_env = gym.make(\"CartPole-v1\", render_mode=\"rgb_array\")" + ], + "metadata": { + "id": "NHNZ0-eFZtUq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Train the agent for 10000 steps\n", + "model.learn(total_timesteps=10_000)" + ], + "metadata": { + "id": "a4b9VAvuZvUJ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4a90467d-5d73-4eed-885a-6c0858eeb9fb" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20 |\n", + "| ep_rew_mean | 20 |\n", + "| time/ | |\n", + "| fps | 186 |\n", + "| iterations | 100 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.692 |\n", + "| explained_variance | 0.0184 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 99 |\n", + "| policy_loss | 1.85 |\n", + "| value_loss | 9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.1 |\n", + "| ep_rew_mean | 23.1 |\n", + "| time/ | |\n", + "| fps | 258 |\n", + "| iterations | 200 |\n", + "| time_elapsed | 3 |\n", + "| total_timesteps | 1000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.692 |\n", + "| explained_variance | 0.0291 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 199 |\n", + "| policy_loss | -6.12 |\n", + "| value_loss | 107 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.5 |\n", + "| ep_rew_mean | 25.5 |\n", + "| time/ | |\n", + "| fps | 299 |\n", + "| iterations | 300 |\n", + "| time_elapsed | 5 |\n", + "| total_timesteps | 1500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.684 |\n", + "| explained_variance | -0.00656 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 299 |\n", + "| policy_loss | 1.56 |\n", + "| value_loss | 6.39 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.8 |\n", + "| ep_rew_mean | 27.8 |\n", + "| time/ | |\n", + "| fps | 326 |\n", + "| iterations | 400 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 2000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.675 |\n", + "| explained_variance | 0.0542 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 399 |\n", + "| policy_loss | 1.29 |\n", + "| value_loss | 5.63 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.7 |\n", + "| ep_rew_mean | 30.7 |\n", + "| time/ | |\n", + "| fps | 343 |\n", + "| iterations | 500 |\n", + "| time_elapsed | 7 |\n", + "| total_timesteps | 2500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.68 |\n", + "| explained_variance | -0.000433 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 499 |\n", + "| policy_loss | 1.21 |\n", + "| value_loss | 5.6 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.4 |\n", + "| ep_rew_mean | 32.4 |\n", + "| time/ | |\n", + "| fps | 339 |\n", + "| iterations | 600 |\n", + "| time_elapsed | 8 |\n", + "| total_timesteps | 3000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.603 |\n", + "| explained_variance | -0.0121 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 599 |\n", + "| policy_loss | 1.24 |\n", + "| value_loss | 5.01 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.4 |\n", + "| ep_rew_mean | 34.4 |\n", + "| time/ | |\n", + "| fps | 342 |\n", + "| iterations | 700 |\n", + "| time_elapsed | 10 |\n", + "| total_timesteps | 3500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.644 |\n", + "| explained_variance | 0.00312 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 699 |\n", + "| policy_loss | 1.04 |\n", + "| value_loss | 4.48 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37 |\n", + "| ep_rew_mean | 37 |\n", + "| time/ | |\n", + "| fps | 345 |\n", + "| iterations | 800 |\n", + "| time_elapsed | 11 |\n", + "| total_timesteps | 4000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.644 |\n", + "| explained_variance | -0.000278 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 799 |\n", + "| policy_loss | 0.917 |\n", + "| value_loss | 3.97 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 40.9 |\n", + "| ep_rew_mean | 40.9 |\n", + "| time/ | |\n", + "| fps | 354 |\n", + "| iterations | 900 |\n", + "| time_elapsed | 12 |\n", + "| total_timesteps | 4500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.625 |\n", + "| explained_variance | -0.000446 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 899 |\n", + "| policy_loss | -17.7 |\n", + "| value_loss | 1.61e+03 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 44.7 |\n", + "| ep_rew_mean | 44.7 |\n", + "| time/ | |\n", + "| fps | 362 |\n", + "| iterations | 1000 |\n", + "| time_elapsed | 13 |\n", + "| total_timesteps | 5000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.487 |\n", + "| explained_variance | -7.62e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 999 |\n", + "| policy_loss | 0.681 |\n", + "| value_loss | 3 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 48.1 |\n", + "| ep_rew_mean | 48.1 |\n", + "| time/ | |\n", + "| fps | 367 |\n", + "| iterations | 1100 |\n", + "| time_elapsed | 14 |\n", + "| total_timesteps | 5500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.616 |\n", + "| explained_variance | 0.00327 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1099 |\n", + "| policy_loss | 0.828 |\n", + "| value_loss | 2.53 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 52.8 |\n", + "| ep_rew_mean | 52.8 |\n", + "| time/ | |\n", + "| fps | 373 |\n", + "| iterations | 1200 |\n", + "| time_elapsed | 16 |\n", + "| total_timesteps | 6000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.52 |\n", + "| explained_variance | -6.9e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1199 |\n", + "| policy_loss | 0.904 |\n", + "| value_loss | 2.11 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 56.5 |\n", + "| ep_rew_mean | 56.5 |\n", + "| time/ | |\n", + "| fps | 377 |\n", + "| iterations | 1300 |\n", + "| time_elapsed | 17 |\n", + "| total_timesteps | 6500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.566 |\n", + "| explained_variance | 1.93e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1299 |\n", + "| policy_loss | 0.926 |\n", + "| value_loss | 1.71 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 59.8 |\n", + "| ep_rew_mean | 59.8 |\n", + "| time/ | |\n", + "| fps | 381 |\n", + "| iterations | 1400 |\n", + "| time_elapsed | 18 |\n", + "| total_timesteps | 7000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.655 |\n", + "| explained_variance | 8.76e-06 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1399 |\n", + "| policy_loss | 0.654 |\n", + "| value_loss | 1.37 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 65 |\n", + "| ep_rew_mean | 65 |\n", + "| time/ | |\n", + "| fps | 384 |\n", + "| iterations | 1500 |\n", + "| time_elapsed | 19 |\n", + "| total_timesteps | 7500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.343 |\n", + "| explained_variance | -2.01e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1499 |\n", + "| policy_loss | 0.927 |\n", + "| value_loss | 1.08 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 69.3 |\n", + "| ep_rew_mean | 69.3 |\n", + "| time/ | |\n", + "| fps | 386 |\n", + "| iterations | 1600 |\n", + "| time_elapsed | 20 |\n", + "| total_timesteps | 8000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.579 |\n", + "| explained_variance | 6.14e-06 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1599 |\n", + "| policy_loss | -36.9 |\n", + "| value_loss | 1.02e+03 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 71.8 |\n", + "| ep_rew_mean | 71.8 |\n", + "| time/ | |\n", + "| fps | 386 |\n", + "| iterations | 1700 |\n", + "| time_elapsed | 21 |\n", + "| total_timesteps | 8500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.566 |\n", + "| explained_variance | -4.07e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1699 |\n", + "| policy_loss | 0.465 |\n", + "| value_loss | 0.579 |\n", + "-------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 77 |\n", + "| ep_rew_mean | 77 |\n", + "| time/ | |\n", + "| fps | 383 |\n", + "| iterations | 1800 |\n", + "| time_elapsed | 23 |\n", + "| total_timesteps | 9000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.583 |\n", + "| explained_variance | -1.61e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1799 |\n", + "| policy_loss | 0.334 |\n", + "| value_loss | 0.382 |\n", + "-------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 80.8 |\n", + "| ep_rew_mean | 80.8 |\n", + "| time/ | |\n", + "| fps | 386 |\n", + "| iterations | 1900 |\n", + "| time_elapsed | 24 |\n", + "| total_timesteps | 9500 |\n", + "| train/ | |\n", + "| entropy_loss | -0.573 |\n", + "| explained_variance | 3.22e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1899 |\n", + "| policy_loss | 0.237 |\n", + "| value_loss | 0.229 |\n", + "------------------------------------\n", + "-------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 84.5 |\n", + "| ep_rew_mean | 84.5 |\n", + "| time/ | |\n", + "| fps | 388 |\n", + "| iterations | 2000 |\n", + "| time_elapsed | 25 |\n", + "| total_timesteps | 10000 |\n", + "| train/ | |\n", + "| entropy_loss | -0.46 |\n", + "| explained_variance | -5.95e-05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 1999 |\n", + "| policy_loss | 0.213 |\n", + "| value_loss | 0.119 |\n", + "-------------------------------------\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "<stable_baselines3.a2c.a2c.A2C at 0x7e7efb9c3f40>" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Evaluate the trained agent\n", + "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=100)\n", + "\n", + "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")" + ], + "metadata": { + "id": "A1b3dko0Zxix", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "090ab383-3321-4b5c-ef0b-d4fb9fea8351" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mean_reward:152.85 +/- 17.30\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### saving the learned model in hub" + ], + "metadata": { + "id": "x6FPQxixCU6A" + } + }, + { + "cell_type": "code", + "source": [ + "login(token=\"****************\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "H0pKB3u7RNln", + "outputId": "03ccf8e1-baf5-4cdb-d915-49d8d881f2fd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n", + "Token is valid (permission: write).\n", + "Your token has been saved to /root/.cache/huggingface/token\n", + "Login successful\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Save the trained model\n", + "model.save(\"ECL-TD-RL1-a2c_cartpole.zip\")\n", + "\n", + "# Load the trained model\n", + "model = A2C.load(\"ECL-TD-RL1-a2c_cartpole.zip\")\n", + "\n", + "push_to_hub(\n", + " repo_id=\"Karim-20/a2c_cartpole\",\n", + " filename=\"ECL-TD-RL1-a2c_cartpole.zip\",\n", + " commit_message=\"Add cartepole-v1 environement, agent used to train is A2C\"\n", + ")\n" + ], + "metadata": { + "id": "9lgpQBFeDIVx", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136, + "referenced_widgets": [ + "3bf20325eeb94361964c8a1620c38727", + "0fdb788454a34aa9bc545568ed8afad7", + "c61ff44a39204ce88d3a43c02388b6e6", + "e650459cdbdc425ca554d4f66c3d6d40", + "9ff1bdcd30904e669984f48e4a93ae0c", + "69ece13913d844ca8e7209cc125e2d7b", + "3525ebe3a5944b278adff0cd7c59a5d7", + "656ba980bd9247f0b9f9c4ecf8b1e4a7", + "572fe508383646baaf82b2349366c671", + "c9f0b1c6f1a74c238ccb347bee20b397", + "9d8e4e68afed4d90a59c4293cd4a6b9d" + ] + }, + "outputId": "cea78c0f-ffdf-4318-def2-c8c3e19c0c35" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[38;5;4mℹ Pushing repo Karim-20/a2c_cartpole to the Hugging Face Hub\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "ECL-TD-RL1-a2c_cartpole.zip: 0%| | 0.00/98.1k [00:00<?, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "3bf20325eeb94361964c8a1620c38727" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[38;5;2m✔ Your model has been uploaded to the Hub, you can find it here:\n", + "https://huggingface.co/Karim-20/a2c_cartpole/tree/main/\u001b[0m\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "CommitInfo(commit_url='https://huggingface.co/Karim-20/a2c_cartpole/commit/11ea47a77ed2f507464852dda3f3888abd0e692c', commit_message='Add cartepole-v1 environement, agent used to train is A2C', commit_description='', oid='11ea47a77ed2f507464852dda3f3888abd0e692c', pr_url=None, pr_revision=None, pr_num=None)" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 3.0 PandaReachJointsDense-v2" + ], + "metadata": { + "id": "nXlE_QHYd70G" + } + }, + { + "cell_type": "code", + "source": [ + "### LIBRARIES\n", + "\n", + "import gymnasium as gym\n", + "from stable_baselines3 import A2C\n", + "from stable_baselines3.common.monitor import Monitor\n", + "from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder\n", + "import wandb\n", + "from wandb.integration.sb3 import WandbCallback\n", + "from huggingface_sb3 import push_to_hub\n", + "import panda_gym\n", + "import os\n", + "from huggingface_hub import login\n" + ], + "metadata": { + "id": "0YBce66VQoaL" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "\n", + "#dir_path = os.path.dirname(os.path.realpath(__file__))\n", + "#os.chdir(dir_path)\n", + "\n", + "config = {\n", + " \"policy_type\": \"MultiInputPolicy\",\n", + " \"total_timesteps\": 250000,\n", + " \"env_name\": \"PandaReachJointsDense-v3\",\n", + "}\n", + "\n", + "run = wandb.init(\n", + " project=\"sb3-panda-reach\",\n", + " config=config,\n", + " sync_tensorboard=True, # auto-upload sb3's tensorboard metrics\n", + " monitor_gym=True, # auto-upload the videos of agents playing the game\n", + " save_code=True, # optional\n", + ")\n", + "\n", + "def make_env():\n", + " env = gym.make(config[\"env_name\"])\n", + " env = Monitor(env) # record stats such as returns\n", + " return env\n", + "\n", + "env = DummyVecEnv([make_env])\n", + "# env = VecVideoRecorder(env, f\"videos/{run.id}\", record_video_trigger=lambda x: x % 2000 == 0, video_length=200)\n", + "model = A2C(config[\"policy_type\"], env, verbose=1, tensorboard_log=f\"runs/{run.id}\")\n", + "model.learn(\n", + " total_timesteps=config[\"total_timesteps\"],\n", + " callback=WandbCallback(\n", + " gradient_save_freq=100,\n", + " model_save_path=f\"models/{run.id}\",\n", + " verbose=2,\n", + " ),\n", + ")\n", + "\n", + "run.finish()\n", + "\n" + ], + "metadata": { + "id": "-iPcpsSpAWh0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "c14a53fd35174f3ba632a22e3c9dda47", + "02913ffc7b024793ab252e74c0427aa9", + "53f02b9c14564874baecde3983423440", + "83c1d52aef7d421db0657103710bcd06", + "9346924fc0d14e10924684c0ab74891c", + "4cf73107b2a74863a612244964f0fc04", + "c056c0b446134ae69bdc93bd93f3af13", + "3d54d2c162b34319bfd1428fb18fd181" + ] + }, + "outputId": "41cb3d73-4ca4-49f7-f258-f4062ca0cad8" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Tracking run with wandb version 0.16.3" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Run data is saved locally in <code>/content/wandb/run-20240305_210146-ihcoeovn</code>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Syncing run <strong><a href='https://wandb.ai/aiblackbelt/sb3-panda-reach/runs/ihcoeovn' target=\"_blank\">dashing-glitter-6</a></strong> to <a href='https://wandb.ai/aiblackbelt/sb3-panda-reach' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View project at <a href='https://wandb.ai/aiblackbelt/sb3-panda-reach' target=\"_blank\">https://wandb.ai/aiblackbelt/sb3-panda-reach</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run at <a href='https://wandb.ai/aiblackbelt/sb3-panda-reach/runs/ihcoeovn' target=\"_blank\">https://wandb.ai/aiblackbelt/sb3-panda-reach/runs/ihcoeovn</a>" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m When using several event log directories, please call `wandb.tensorboard.patch(root_logdir=\"...\")` before `wandb.init`\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.\u001b[0m\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 22300 |\n", + "| time_elapsed | 515 |\n", + "| total_timesteps | 111500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.45 |\n", + "| explained_variance | 0.00867 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22299 |\n", + "| policy_loss | -2.16 |\n", + "| std | 0.939 |\n", + "| value_loss | 0.0636 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 44.9 |\n", + "| ep_rew_mean | -7.03 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 22400 |\n", + "| time_elapsed | 518 |\n", + "| total_timesteps | 112000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.48 |\n", + "| explained_variance | -0.683 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22399 |\n", + "| policy_loss | 1.14 |\n", + "| std | 0.943 |\n", + "| value_loss | 0.0359 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 43.9 |\n", + "| ep_rew_mean | -6.64 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 22500 |\n", + "| time_elapsed | 520 |\n", + "| total_timesteps | 112500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.5 |\n", + "| explained_variance | -0.145 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22499 |\n", + "| policy_loss | -2.57 |\n", + "| std | 0.945 |\n", + "| value_loss | 0.0833 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 42.8 |\n", + "| ep_rew_mean | -6.33 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 22600 |\n", + "| time_elapsed | 522 |\n", + "| total_timesteps | 113000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.49 |\n", + "| explained_variance | -8.08 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22599 |\n", + "| policy_loss | -1.29 |\n", + "| std | 0.945 |\n", + "| value_loss | 0.0271 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.1 |\n", + "| ep_rew_mean | -4.9 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 22700 |\n", + "| time_elapsed | 525 |\n", + "| total_timesteps | 113500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.46 |\n", + "| explained_variance | -0.776 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22699 |\n", + "| policy_loss | 52.8 |\n", + "| std | 0.941 |\n", + "| value_loss | 42.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.1 |\n", + "| ep_rew_mean | -4.16 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 22800 |\n", + "| time_elapsed | 527 |\n", + "| total_timesteps | 114000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.47 |\n", + "| explained_variance | 0.752 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22799 |\n", + "| policy_loss | -0.807 |\n", + "| std | 0.942 |\n", + "| value_loss | 0.00859 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.8 |\n", + "| ep_rew_mean | -2.49 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 22900 |\n", + "| time_elapsed | 530 |\n", + "| total_timesteps | 114500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.45 |\n", + "| explained_variance | -0.247 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22899 |\n", + "| policy_loss | 62.9 |\n", + "| std | 0.939 |\n", + "| value_loss | 72.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 19.7 |\n", + "| ep_rew_mean | -2.21 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 23000 |\n", + "| time_elapsed | 532 |\n", + "| total_timesteps | 115000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.44 |\n", + "| explained_variance | 0.965 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 22999 |\n", + "| policy_loss | -4.07 |\n", + "| std | 0.938 |\n", + "| value_loss | 0.216 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.5 |\n", + "| ep_rew_mean | -2.51 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 23100 |\n", + "| time_elapsed | 534 |\n", + "| total_timesteps | 115500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.45 |\n", + "| explained_variance | -2.05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23099 |\n", + "| policy_loss | 2.27 |\n", + "| std | 0.939 |\n", + "| value_loss | 0.0759 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23 |\n", + "| ep_rew_mean | -2.74 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 23200 |\n", + "| time_elapsed | 537 |\n", + "| total_timesteps | 116000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.45 |\n", + "| explained_variance | -6.83 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23199 |\n", + "| policy_loss | 0.588 |\n", + "| std | 0.939 |\n", + "| value_loss | 0.0248 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.4 |\n", + "| ep_rew_mean | -3.11 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 23300 |\n", + "| time_elapsed | 539 |\n", + "| total_timesteps | 116500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.44 |\n", + "| explained_variance | -22 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23299 |\n", + "| policy_loss | -0.0544 |\n", + "| std | 0.938 |\n", + "| value_loss | 0.0256 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28 |\n", + "| ep_rew_mean | -3.46 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 23400 |\n", + "| time_elapsed | 541 |\n", + "| total_timesteps | 117000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.41 |\n", + "| explained_variance | -3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23399 |\n", + "| policy_loss | -0.346 |\n", + "| std | 0.933 |\n", + "| value_loss | 0.0151 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.2 |\n", + "| ep_rew_mean | -3.96 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 23500 |\n", + "| time_elapsed | 544 |\n", + "| total_timesteps | 117500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.4 |\n", + "| explained_variance | -43.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23499 |\n", + "| policy_loss | 2.42 |\n", + "| std | 0.932 |\n", + "| value_loss | 0.07 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.6 |\n", + "| ep_rew_mean | -3.97 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 23600 |\n", + "| time_elapsed | 546 |\n", + "| total_timesteps | 118000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.41 |\n", + "| explained_variance | -53.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23599 |\n", + "| policy_loss | -2.25 |\n", + "| std | 0.933 |\n", + "| value_loss | 0.0758 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32 |\n", + "| ep_rew_mean | -4.39 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 23700 |\n", + "| time_elapsed | 548 |\n", + "| total_timesteps | 118500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.43 |\n", + "| explained_variance | -1.86 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23699 |\n", + "| policy_loss | -1.53 |\n", + "| std | 0.937 |\n", + "| value_loss | 0.0645 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.3 |\n", + "| ep_rew_mean | -4.6 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 23800 |\n", + "| time_elapsed | 550 |\n", + "| total_timesteps | 119000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.41 |\n", + "| explained_variance | -2.95 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23799 |\n", + "| policy_loss | 3.8 |\n", + "| std | 0.933 |\n", + "| value_loss | 0.316 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.8 |\n", + "| ep_rew_mean | -4.61 |\n", + "| time/ | |\n", + "| fps | 216 |\n", + "| iterations | 23900 |\n", + "| time_elapsed | 553 |\n", + "| total_timesteps | 119500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.4 |\n", + "| explained_variance | 0.991 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23899 |\n", + "| policy_loss | -1.39 |\n", + "| std | 0.933 |\n", + "| value_loss | 0.0448 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.2 |\n", + "| ep_rew_mean | -4.6 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24000 |\n", + "| time_elapsed | 556 |\n", + "| total_timesteps | 120000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.42 |\n", + "| explained_variance | -4.95 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 23999 |\n", + "| policy_loss | 2.49 |\n", + "| std | 0.935 |\n", + "| value_loss | 0.0738 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.8 |\n", + "| ep_rew_mean | -4.37 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24100 |\n", + "| time_elapsed | 558 |\n", + "| total_timesteps | 120500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.42 |\n", + "| explained_variance | -6.33 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24099 |\n", + "| policy_loss | -0.807 |\n", + "| std | 0.935 |\n", + "| value_loss | 0.0239 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31 |\n", + "| ep_rew_mean | -4.31 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24200 |\n", + "| time_elapsed | 560 |\n", + "| total_timesteps | 121000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.39 |\n", + "| explained_variance | -2.43 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24199 |\n", + "| policy_loss | -9.51 |\n", + "| std | 0.931 |\n", + "| value_loss | 1.49 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.7 |\n", + "| ep_rew_mean | -3.95 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24300 |\n", + "| time_elapsed | 562 |\n", + "| total_timesteps | 121500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -1.98 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24299 |\n", + "| policy_loss | -0.455 |\n", + "| std | 0.93 |\n", + "| value_loss | 0.0111 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.1 |\n", + "| ep_rew_mean | -3.58 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24400 |\n", + "| time_elapsed | 565 |\n", + "| total_timesteps | 122000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.35 |\n", + "| explained_variance | -1.73 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24399 |\n", + "| policy_loss | -0.0601 |\n", + "| std | 0.926 |\n", + "| value_loss | 0.0105 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.9 |\n", + "| ep_rew_mean | -3.59 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24500 |\n", + "| time_elapsed | 567 |\n", + "| total_timesteps | 122500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.37 |\n", + "| explained_variance | 0.991 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24499 |\n", + "| policy_loss | -0.474 |\n", + "| std | 0.929 |\n", + "| value_loss | 0.00794 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.4 |\n", + "| ep_rew_mean | -4.04 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24600 |\n", + "| time_elapsed | 570 |\n", + "| total_timesteps | 123000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.39 |\n", + "| explained_variance | -0.122 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24599 |\n", + "| policy_loss | -1.6 |\n", + "| std | 0.931 |\n", + "| value_loss | 0.0356 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.6 |\n", + "| ep_rew_mean | -4.51 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24700 |\n", + "| time_elapsed | 572 |\n", + "| total_timesteps | 123500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -21.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24699 |\n", + "| policy_loss | -3.84 |\n", + "| std | 0.93 |\n", + "| value_loss | 0.176 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.9 |\n", + "| ep_rew_mean | -5.19 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24800 |\n", + "| time_elapsed | 574 |\n", + "| total_timesteps | 124000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.4 |\n", + "| explained_variance | -11.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24799 |\n", + "| policy_loss | 187 |\n", + "| std | 0.931 |\n", + "| value_loss | 547 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.5 |\n", + "| ep_rew_mean | -5.08 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 24900 |\n", + "| time_elapsed | 577 |\n", + "| total_timesteps | 124500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.39 |\n", + "| explained_variance | -2.02 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24899 |\n", + "| policy_loss | -4.38 |\n", + "| std | 0.931 |\n", + "| value_loss | 0.253 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.5 |\n", + "| ep_rew_mean | -5.31 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25000 |\n", + "| time_elapsed | 579 |\n", + "| total_timesteps | 125000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.39 |\n", + "| explained_variance | -0.537 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 24999 |\n", + "| policy_loss | -0.974 |\n", + "| std | 0.93 |\n", + "| value_loss | 0.0223 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.6 |\n", + "| ep_rew_mean | -5.53 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25100 |\n", + "| time_elapsed | 582 |\n", + "| total_timesteps | 125500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -54.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25099 |\n", + "| policy_loss | -2.16 |\n", + "| std | 0.929 |\n", + "| value_loss | 0.0498 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.2 |\n", + "| ep_rew_mean | -5.88 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25200 |\n", + "| time_elapsed | 584 |\n", + "| total_timesteps | 126000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -27.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25199 |\n", + "| policy_loss | 1.16 |\n", + "| std | 0.929 |\n", + "| value_loss | 0.0288 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.1 |\n", + "| ep_rew_mean | -5.92 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25300 |\n", + "| time_elapsed | 587 |\n", + "| total_timesteps | 126500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.37 |\n", + "| explained_variance | -15.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25299 |\n", + "| policy_loss | -1.05 |\n", + "| std | 0.927 |\n", + "| value_loss | 0.0258 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 35 |\n", + "| ep_rew_mean | -5.25 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25400 |\n", + "| time_elapsed | 589 |\n", + "| total_timesteps | 127000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -15.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25399 |\n", + "| policy_loss | 3.79 |\n", + "| std | 0.929 |\n", + "| value_loss | 0.251 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36 |\n", + "| ep_rew_mean | -5.17 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25500 |\n", + "| time_elapsed | 591 |\n", + "| total_timesteps | 127500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.39 |\n", + "| explained_variance | 0.514 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25499 |\n", + "| policy_loss | 1.61 |\n", + "| std | 0.93 |\n", + "| value_loss | 0.0388 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.3 |\n", + "| ep_rew_mean | -5.45 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25600 |\n", + "| time_elapsed | 593 |\n", + "| total_timesteps | 128000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.39 |\n", + "| explained_variance | -0.424 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25599 |\n", + "| policy_loss | 1.74 |\n", + "| std | 0.931 |\n", + "| value_loss | 0.0342 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.3 |\n", + "| ep_rew_mean | -5.49 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25700 |\n", + "| time_elapsed | 596 |\n", + "| total_timesteps | 128500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -2.54 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25699 |\n", + "| policy_loss | -1.86 |\n", + "| std | 0.929 |\n", + "| value_loss | 0.053 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.7 |\n", + "| ep_rew_mean | -5.64 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25800 |\n", + "| time_elapsed | 598 |\n", + "| total_timesteps | 129000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.37 |\n", + "| explained_variance | -23.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25799 |\n", + "| policy_loss | -0.621 |\n", + "| std | 0.928 |\n", + "| value_loss | 0.0133 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.3 |\n", + "| ep_rew_mean | -5.42 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 25900 |\n", + "| time_elapsed | 601 |\n", + "| total_timesteps | 129500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.38 |\n", + "| explained_variance | -76.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25899 |\n", + "| policy_loss | 0.284 |\n", + "| std | 0.929 |\n", + "| value_loss | 0.0124 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.8 |\n", + "| ep_rew_mean | -5.41 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26000 |\n", + "| time_elapsed | 603 |\n", + "| total_timesteps | 130000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.35 |\n", + "| explained_variance | -0.16 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 25999 |\n", + "| policy_loss | 2.86 |\n", + "| std | 0.925 |\n", + "| value_loss | 0.106 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.7 |\n", + "| ep_rew_mean | -5.52 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26100 |\n", + "| time_elapsed | 605 |\n", + "| total_timesteps | 130500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.35 |\n", + "| explained_variance | 0.164 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26099 |\n", + "| policy_loss | 0.113 |\n", + "| std | 0.926 |\n", + "| value_loss | 0.000767 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.7 |\n", + "| ep_rew_mean | -5.61 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26200 |\n", + "| time_elapsed | 608 |\n", + "| total_timesteps | 131000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.32 |\n", + "| explained_variance | 0.343 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26199 |\n", + "| policy_loss | -0.0984 |\n", + "| std | 0.922 |\n", + "| value_loss | 0.00199 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.3 |\n", + "| ep_rew_mean | -5.59 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26300 |\n", + "| time_elapsed | 610 |\n", + "| total_timesteps | 131500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.29 |\n", + "| explained_variance | -223 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26299 |\n", + "| policy_loss | 1.5 |\n", + "| std | 0.918 |\n", + "| value_loss | 0.0374 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38 |\n", + "| ep_rew_mean | -5.76 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26400 |\n", + "| time_elapsed | 612 |\n", + "| total_timesteps | 132000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -3.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26399 |\n", + "| policy_loss | 3.28 |\n", + "| std | 0.919 |\n", + "| value_loss | 0.12 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.2 |\n", + "| ep_rew_mean | -5.88 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26500 |\n", + "| time_elapsed | 614 |\n", + "| total_timesteps | 132500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -1.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26499 |\n", + "| policy_loss | -2.88 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.159 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.4 |\n", + "| ep_rew_mean | -5.92 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26600 |\n", + "| time_elapsed | 616 |\n", + "| total_timesteps | 133000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -0.651 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26599 |\n", + "| policy_loss | 0.364 |\n", + "| std | 0.919 |\n", + "| value_loss | 0.00463 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36 |\n", + "| ep_rew_mean | -5.42 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26700 |\n", + "| time_elapsed | 619 |\n", + "| total_timesteps | 133500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -1.05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26699 |\n", + "| policy_loss | 1.63 |\n", + "| std | 0.921 |\n", + "| value_loss | 0.0252 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.8 |\n", + "| ep_rew_mean | -5.18 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26800 |\n", + "| time_elapsed | 622 |\n", + "| total_timesteps | 134000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.29 |\n", + "| explained_variance | -1.78 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26799 |\n", + "| policy_loss | 0.653 |\n", + "| std | 0.918 |\n", + "| value_loss | 0.00575 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.9 |\n", + "| ep_rew_mean | -4.33 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 26900 |\n", + "| time_elapsed | 624 |\n", + "| total_timesteps | 134500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.31 |\n", + "| explained_variance | -42.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26899 |\n", + "| policy_loss | 0.447 |\n", + "| std | 0.922 |\n", + "| value_loss | 0.00399 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.8 |\n", + "| ep_rew_mean | -3.04 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27000 |\n", + "| time_elapsed | 626 |\n", + "| total_timesteps | 135000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.3 |\n", + "| explained_variance | -11.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 26999 |\n", + "| policy_loss | -0.0141 |\n", + "| std | 0.92 |\n", + "| value_loss | 0.032 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.6 |\n", + "| ep_rew_mean | -3.05 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27100 |\n", + "| time_elapsed | 628 |\n", + "| total_timesteps | 135500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.27 |\n", + "| explained_variance | -10.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27099 |\n", + "| policy_loss | 1.04 |\n", + "| std | 0.917 |\n", + "| value_loss | 0.0236 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.4 |\n", + "| ep_rew_mean | -3.01 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27200 |\n", + "| time_elapsed | 631 |\n", + "| total_timesteps | 136000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.26 |\n", + "| explained_variance | -8.63 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27199 |\n", + "| policy_loss | -1.78 |\n", + "| std | 0.916 |\n", + "| value_loss | 0.0857 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.7 |\n", + "| ep_rew_mean | -2.86 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27300 |\n", + "| time_elapsed | 634 |\n", + "| total_timesteps | 136500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.24 |\n", + "| explained_variance | -41.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27299 |\n", + "| policy_loss | 0.747 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.0168 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.7 |\n", + "| ep_rew_mean | -3.07 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27400 |\n", + "| time_elapsed | 636 |\n", + "| total_timesteps | 137000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.24 |\n", + "| explained_variance | -10.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27399 |\n", + "| policy_loss | 0.642 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.0117 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27 |\n", + "| ep_rew_mean | -3.17 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27500 |\n", + "| time_elapsed | 638 |\n", + "| total_timesteps | 137500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.25 |\n", + "| explained_variance | -0.721 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27499 |\n", + "| policy_loss | 0.0641 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.00459 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.3 |\n", + "| ep_rew_mean | -3.14 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27600 |\n", + "| time_elapsed | 640 |\n", + "| total_timesteps | 138000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.24 |\n", + "| explained_variance | -2.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27599 |\n", + "| policy_loss | -1.93 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.0774 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.6 |\n", + "| ep_rew_mean | -2.94 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27700 |\n", + "| time_elapsed | 643 |\n", + "| total_timesteps | 138500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27699 |\n", + "| policy_loss | 0.826 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.0189 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.7 |\n", + "| ep_rew_mean | -2.71 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27800 |\n", + "| time_elapsed | 646 |\n", + "| total_timesteps | 139000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -0.207 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27799 |\n", + "| policy_loss | 1.73 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.0429 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.4 |\n", + "| ep_rew_mean | -2.82 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 27900 |\n", + "| time_elapsed | 648 |\n", + "| total_timesteps | 139500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -59.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27899 |\n", + "| policy_loss | 1.17 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.0255 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.4 |\n", + "| ep_rew_mean | -2.87 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28000 |\n", + "| time_elapsed | 650 |\n", + "| total_timesteps | 140000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -13.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 27999 |\n", + "| policy_loss | -1.4 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.022 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.1 |\n", + "| ep_rew_mean | -3.14 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28100 |\n", + "| time_elapsed | 652 |\n", + "| total_timesteps | 140500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -34.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28099 |\n", + "| policy_loss | -3.05 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.145 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.6 |\n", + "| ep_rew_mean | -3.21 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28200 |\n", + "| time_elapsed | 655 |\n", + "| total_timesteps | 141000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -4.58 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28199 |\n", + "| policy_loss | -1.39 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.039 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.9 |\n", + "| ep_rew_mean | -3.74 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28300 |\n", + "| time_elapsed | 657 |\n", + "| total_timesteps | 141500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -6.05 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28299 |\n", + "| policy_loss | -1.4 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.0386 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.1 |\n", + "| ep_rew_mean | -4.39 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28400 |\n", + "| time_elapsed | 660 |\n", + "| total_timesteps | 142000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -4.65 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28399 |\n", + "| policy_loss | -2.57 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.117 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.4 |\n", + "| ep_rew_mean | -4.91 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28500 |\n", + "| time_elapsed | 662 |\n", + "| total_timesteps | 142500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | 0.988 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28499 |\n", + "| policy_loss | 0.262 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.00413 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.7 |\n", + "| ep_rew_mean | -4.88 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28600 |\n", + "| time_elapsed | 665 |\n", + "| total_timesteps | 143000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | 0.992 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28599 |\n", + "| policy_loss | 0.674 |\n", + "| std | 0.904 |\n", + "| value_loss | 0.0167 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.4 |\n", + "| ep_rew_mean | -5.37 |\n", + "| time/ | |\n", + "| fps | 215 |\n", + "| iterations | 28700 |\n", + "| time_elapsed | 667 |\n", + "| total_timesteps | 143500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28699 |\n", + "| policy_loss | -0.149 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.011 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 38.5 |\n", + "| ep_rew_mean | -5.55 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 28800 |\n", + "| time_elapsed | 669 |\n", + "| total_timesteps | 144000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -0.433 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28799 |\n", + "| policy_loss | -0.77 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.0233 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 37.5 |\n", + "| ep_rew_mean | -5.47 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 28900 |\n", + "| time_elapsed | 673 |\n", + "| total_timesteps | 144500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | 0.646 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28899 |\n", + "| policy_loss | -1.16 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.0178 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 35 |\n", + "| ep_rew_mean | -4.91 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29000 |\n", + "| time_elapsed | 675 |\n", + "| total_timesteps | 145000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | 0.712 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 28999 |\n", + "| policy_loss | 0.899 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.0109 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34 |\n", + "| ep_rew_mean | -4.6 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29100 |\n", + "| time_elapsed | 677 |\n", + "| total_timesteps | 145500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | 0.942 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29099 |\n", + "| policy_loss | 1.19 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.0268 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.6 |\n", + "| ep_rew_mean | -4.24 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29200 |\n", + "| time_elapsed | 679 |\n", + "| total_timesteps | 146000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -11.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29199 |\n", + "| policy_loss | -0.626 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.00733 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.1 |\n", + "| ep_rew_mean | -3.93 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29300 |\n", + "| time_elapsed | 682 |\n", + "| total_timesteps | 146500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -40.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29299 |\n", + "| policy_loss | -0.387 |\n", + "| std | 0.906 |\n", + "| value_loss | 0.0469 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.6 |\n", + "| ep_rew_mean | -3.81 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29400 |\n", + "| time_elapsed | 685 |\n", + "| total_timesteps | 147000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -14.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29399 |\n", + "| policy_loss | -0.0407 |\n", + "| std | 0.904 |\n", + "| value_loss | 0.00367 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.1 |\n", + "| ep_rew_mean | -3.68 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29500 |\n", + "| time_elapsed | 687 |\n", + "| total_timesteps | 147500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.13 |\n", + "| explained_variance | -3.21 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29499 |\n", + "| policy_loss | -0.565 |\n", + "| std | 0.901 |\n", + "| value_loss | 0.00505 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.4 |\n", + "| ep_rew_mean | -3.72 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29600 |\n", + "| time_elapsed | 689 |\n", + "| total_timesteps | 148000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -364 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29599 |\n", + "| policy_loss | -0.779 |\n", + "| std | 0.904 |\n", + "| value_loss | 0.0386 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.9 |\n", + "| ep_rew_mean | -3.82 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29700 |\n", + "| time_elapsed | 692 |\n", + "| total_timesteps | 148500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -1.76 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29699 |\n", + "| policy_loss | -1.5 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.0229 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.1 |\n", + "| ep_rew_mean | -3.91 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29800 |\n", + "| time_elapsed | 694 |\n", + "| total_timesteps | 149000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -1.26 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29799 |\n", + "| policy_loss | 0.248 |\n", + "| std | 0.904 |\n", + "| value_loss | 0.00225 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.5 |\n", + "| ep_rew_mean | -3.8 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 29900 |\n", + "| time_elapsed | 697 |\n", + "| total_timesteps | 149500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -0.57 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29899 |\n", + "| policy_loss | -3.42 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.154 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.1 |\n", + "| ep_rew_mean | -3.98 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30000 |\n", + "| time_elapsed | 699 |\n", + "| total_timesteps | 150000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -16 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 29999 |\n", + "| policy_loss | -1.03 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.0146 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.4 |\n", + "| ep_rew_mean | -3.92 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30100 |\n", + "| time_elapsed | 702 |\n", + "| total_timesteps | 150500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -41 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30099 |\n", + "| policy_loss | -1.26 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.0226 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 36.5 |\n", + "| ep_rew_mean | -4.12 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30200 |\n", + "| time_elapsed | 704 |\n", + "| total_timesteps | 151000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -0.415 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30199 |\n", + "| policy_loss | -2.32 |\n", + "| std | 0.906 |\n", + "| value_loss | 0.0702 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.5 |\n", + "| ep_rew_mean | -4.03 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30300 |\n", + "| time_elapsed | 706 |\n", + "| total_timesteps | 151500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -7.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30299 |\n", + "| policy_loss | 3.43 |\n", + "| std | 0.906 |\n", + "| value_loss | 0.095 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.2 |\n", + "| ep_rew_mean | -3.73 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30400 |\n", + "| time_elapsed | 709 |\n", + "| total_timesteps | 152000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -46.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30399 |\n", + "| policy_loss | 1.71 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0525 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.9 |\n", + "| ep_rew_mean | -3.48 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30500 |\n", + "| time_elapsed | 712 |\n", + "| total_timesteps | 152500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -10.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30499 |\n", + "| policy_loss | 82.9 |\n", + "| std | 0.912 |\n", + "| value_loss | 77.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.5 |\n", + "| ep_rew_mean | -3.36 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30600 |\n", + "| time_elapsed | 714 |\n", + "| total_timesteps | 153000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -9.28 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30599 |\n", + "| policy_loss | -1.65 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0547 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24 |\n", + "| ep_rew_mean | -2.71 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30700 |\n", + "| time_elapsed | 716 |\n", + "| total_timesteps | 153500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | 0.204 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30699 |\n", + "| policy_loss | 71.1 |\n", + "| std | 0.912 |\n", + "| value_loss | 71.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.5 |\n", + "| ep_rew_mean | -2.2 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30800 |\n", + "| time_elapsed | 719 |\n", + "| total_timesteps | 154000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -10.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30799 |\n", + "| policy_loss | -2.74 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.103 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.1 |\n", + "| ep_rew_mean | -2.1 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 30900 |\n", + "| time_elapsed | 721 |\n", + "| total_timesteps | 154500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | 0.33 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30899 |\n", + "| policy_loss | 10.6 |\n", + "| std | 0.908 |\n", + "| value_loss | 12 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.1 |\n", + "| ep_rew_mean | -2.23 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31000 |\n", + "| time_elapsed | 724 |\n", + "| total_timesteps | 155000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.14 |\n", + "| explained_variance | -6.04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 30999 |\n", + "| policy_loss | 0.0295 |\n", + "| std | 0.904 |\n", + "| value_loss | 0.00765 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.4 |\n", + "| ep_rew_mean | -2.13 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31100 |\n", + "| time_elapsed | 726 |\n", + "| total_timesteps | 155500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.13 |\n", + "| explained_variance | -11.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31099 |\n", + "| policy_loss | 0.0987 |\n", + "| std | 0.903 |\n", + "| value_loss | 0.00813 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.6 |\n", + "| ep_rew_mean | -2.32 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31200 |\n", + "| time_elapsed | 728 |\n", + "| total_timesteps | 156000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.12 |\n", + "| explained_variance | -2.67 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31199 |\n", + "| policy_loss | -0.126 |\n", + "| std | 0.902 |\n", + "| value_loss | 0.0016 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.8 |\n", + "| ep_rew_mean | -2.13 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 31300 |\n", + "| time_elapsed | 731 |\n", + "| total_timesteps | 156500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -2.64 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31299 |\n", + "| policy_loss | -2.48 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.106 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.4 |\n", + "| ep_rew_mean | -2.06 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 31400 |\n", + "| time_elapsed | 733 |\n", + "| total_timesteps | 157000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -0.459 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31399 |\n", + "| policy_loss | -0.531 |\n", + "| std | 0.906 |\n", + "| value_loss | 0.00477 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.8 |\n", + "| ep_rew_mean | -2.34 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31500 |\n", + "| time_elapsed | 736 |\n", + "| total_timesteps | 157500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -23.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31499 |\n", + "| policy_loss | -0.867 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.022 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.4 |\n", + "| ep_rew_mean | -2.54 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31600 |\n", + "| time_elapsed | 738 |\n", + "| total_timesteps | 158000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -544 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31599 |\n", + "| policy_loss | -2.59 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.109 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.5 |\n", + "| ep_rew_mean | -2.79 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31700 |\n", + "| time_elapsed | 741 |\n", + "| total_timesteps | 158500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -0.925 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31699 |\n", + "| policy_loss | 0.322 |\n", + "| std | 0.909 |\n", + "| value_loss | 0.00291 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.6 |\n", + "| ep_rew_mean | -3.16 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31800 |\n", + "| time_elapsed | 743 |\n", + "| total_timesteps | 159000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -6.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31799 |\n", + "| policy_loss | -0.185 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.000914 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30 |\n", + "| ep_rew_mean | -3.45 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 31900 |\n", + "| time_elapsed | 745 |\n", + "| total_timesteps | 159500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | 0.627 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31899 |\n", + "| policy_loss | -0.0331 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.00058 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 32.4 |\n", + "| ep_rew_mean | -3.76 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32000 |\n", + "| time_elapsed | 748 |\n", + "| total_timesteps | 160000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -0.0635 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 31999 |\n", + "| policy_loss | 0.838 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0116 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.7 |\n", + "| ep_rew_mean | -3.63 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32100 |\n", + "| time_elapsed | 751 |\n", + "| total_timesteps | 160500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -27.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32099 |\n", + "| policy_loss | 0.601 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.00816 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.5 |\n", + "| ep_rew_mean | -3.28 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32200 |\n", + "| time_elapsed | 753 |\n", + "| total_timesteps | 161000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.21 |\n", + "| explained_variance | -1.38 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32199 |\n", + "| policy_loss | 0.0595 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.00292 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.8 |\n", + "| ep_rew_mean | -2.87 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32300 |\n", + "| time_elapsed | 755 |\n", + "| total_timesteps | 161500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -1.75 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32299 |\n", + "| policy_loss | -1.53 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0333 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.1 |\n", + "| ep_rew_mean | -1.95 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32400 |\n", + "| time_elapsed | 757 |\n", + "| total_timesteps | 162000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -5.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32399 |\n", + "| policy_loss | -1.26 |\n", + "| std | 0.91 |\n", + "| value_loss | 0.0261 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 19.5 |\n", + "| ep_rew_mean | -1.91 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32500 |\n", + "| time_elapsed | 760 |\n", + "| total_timesteps | 162500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -3.01 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32499 |\n", + "| policy_loss | 0.0395 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.00248 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.7 |\n", + "| ep_rew_mean | -2.18 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32600 |\n", + "| time_elapsed | 762 |\n", + "| total_timesteps | 163000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -84.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32599 |\n", + "| policy_loss | -1.51 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.0319 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.5 |\n", + "| ep_rew_mean | -2.35 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32700 |\n", + "| time_elapsed | 765 |\n", + "| total_timesteps | 163500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.14 |\n", + "| explained_variance | -1.32 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32699 |\n", + "| policy_loss | -1.37 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.0274 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.8 |\n", + "| ep_rew_mean | -2.44 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32800 |\n", + "| time_elapsed | 767 |\n", + "| total_timesteps | 164000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -63.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32799 |\n", + "| policy_loss | 1.64 |\n", + "| std | 0.907 |\n", + "| value_loss | 0.0403 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.6 |\n", + "| ep_rew_mean | -2.65 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 32900 |\n", + "| time_elapsed | 769 |\n", + "| total_timesteps | 164500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | -0.602 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32899 |\n", + "| policy_loss | 65.8 |\n", + "| std | 0.907 |\n", + "| value_loss | 56.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.2 |\n", + "| ep_rew_mean | -2.65 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33000 |\n", + "| time_elapsed | 771 |\n", + "| total_timesteps | 165000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.15 |\n", + "| explained_variance | 0.499 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 32999 |\n", + "| policy_loss | 58.8 |\n", + "| std | 0.907 |\n", + "| value_loss | 39.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.6 |\n", + "| ep_rew_mean | -2.2 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33100 |\n", + "| time_elapsed | 773 |\n", + "| total_timesteps | 165500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -0.745 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33099 |\n", + "| policy_loss | -2.53 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.0953 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.6 |\n", + "| ep_rew_mean | -2.16 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33200 |\n", + "| time_elapsed | 776 |\n", + "| total_timesteps | 166000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -3.39 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33199 |\n", + "| policy_loss | 0.465 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.00348 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.1 |\n", + "| ep_rew_mean | -2.26 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33300 |\n", + "| time_elapsed | 778 |\n", + "| total_timesteps | 166500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | -4.91 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33299 |\n", + "| policy_loss | 0.256 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.00234 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.9 |\n", + "| ep_rew_mean | -2.3 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33400 |\n", + "| time_elapsed | 780 |\n", + "| total_timesteps | 167000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | -56.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33399 |\n", + "| policy_loss | -0.6 |\n", + "| std | 0.915 |\n", + "| value_loss | 0.023 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.4 |\n", + "| ep_rew_mean | -2.33 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33500 |\n", + "| time_elapsed | 782 |\n", + "| total_timesteps | 167500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.14 |\n", + "| explained_variance | -0.972 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33499 |\n", + "| policy_loss | -1.6 |\n", + "| std | 0.908 |\n", + "| value_loss | 0.033 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.3 |\n", + "| ep_rew_mean | -2.62 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 33600 |\n", + "| time_elapsed | 784 |\n", + "| total_timesteps | 168000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.17 |\n", + "| explained_variance | -0.332 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33599 |\n", + "| policy_loss | -3.35 |\n", + "| std | 0.911 |\n", + "| value_loss | 0.155 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.5 |\n", + "| ep_rew_mean | -2.61 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33700 |\n", + "| time_elapsed | 787 |\n", + "| total_timesteps | 168500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.2 |\n", + "| explained_variance | 0.0505 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33699 |\n", + "| policy_loss | 27.6 |\n", + "| std | 0.916 |\n", + "| value_loss | 24.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.4 |\n", + "| ep_rew_mean | -2.46 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 33800 |\n", + "| time_elapsed | 789 |\n", + "| total_timesteps | 169000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | 0.133 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33799 |\n", + "| policy_loss | -1.41 |\n", + "| std | 0.912 |\n", + "| value_loss | 0.0282 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.7 |\n", + "| ep_rew_mean | -2.35 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 33900 |\n", + "| time_elapsed | 792 |\n", + "| total_timesteps | 169500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.18 |\n", + "| explained_variance | -0.0856 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33899 |\n", + "| policy_loss | -0.339 |\n", + "| std | 0.913 |\n", + "| value_loss | 0.00213 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17.7 |\n", + "| ep_rew_mean | -1.91 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34000 |\n", + "| time_elapsed | 794 |\n", + "| total_timesteps | 170000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.19 |\n", + "| explained_variance | 0.412 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 33999 |\n", + "| policy_loss | -2.75 |\n", + "| std | 0.914 |\n", + "| value_loss | 0.101 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 16.7 |\n", + "| ep_rew_mean | -1.71 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34100 |\n", + "| time_elapsed | 796 |\n", + "| total_timesteps | 170500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.16 |\n", + "| explained_variance | -0.268 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34099 |\n", + "| policy_loss | 30.5 |\n", + "| std | 0.91 |\n", + "| value_loss | 24.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.1 |\n", + "| ep_rew_mean | -1.17 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34200 |\n", + "| time_elapsed | 798 |\n", + "| total_timesteps | 171000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.13 |\n", + "| explained_variance | -0.133 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34199 |\n", + "| policy_loss | 18.1 |\n", + "| std | 0.908 |\n", + "| value_loss | 6.21 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.1 |\n", + "| ep_rew_mean | -1.04 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 34300 |\n", + "| time_elapsed | 801 |\n", + "| total_timesteps | 171500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.11 |\n", + "| explained_variance | 0.0785 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34299 |\n", + "| policy_loss | 19.5 |\n", + "| std | 0.904 |\n", + "| value_loss | 19.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.2 |\n", + "| ep_rew_mean | -1.14 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34400 |\n", + "| time_elapsed | 803 |\n", + "| total_timesteps | 172000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.09 |\n", + "| explained_variance | -0.0422 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34399 |\n", + "| policy_loss | 37.4 |\n", + "| std | 0.902 |\n", + "| value_loss | 27.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.9 |\n", + "| ep_rew_mean | -1.1 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34500 |\n", + "| time_elapsed | 805 |\n", + "| total_timesteps | 172500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.07 |\n", + "| explained_variance | -7.39 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34499 |\n", + "| policy_loss | -1.71 |\n", + "| std | 0.898 |\n", + "| value_loss | 0.0466 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.7 |\n", + "| ep_rew_mean | -0.966 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34600 |\n", + "| time_elapsed | 807 |\n", + "| total_timesteps | 173000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.06 |\n", + "| explained_variance | 0.215 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34599 |\n", + "| policy_loss | 10.6 |\n", + "| std | 0.897 |\n", + "| value_loss | 6.51 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.18 |\n", + "| ep_rew_mean | -0.818 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34700 |\n", + "| time_elapsed | 810 |\n", + "| total_timesteps | 173500 |\n", + "| train/ | |\n", + "| entropy_loss | -9.03 |\n", + "| explained_variance | -3.54 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34699 |\n", + "| policy_loss | 17.6 |\n", + "| std | 0.892 |\n", + "| value_loss | 5.16 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.39 |\n", + "| ep_rew_mean | -0.726 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34800 |\n", + "| time_elapsed | 812 |\n", + "| total_timesteps | 174000 |\n", + "| train/ | |\n", + "| entropy_loss | -9.01 |\n", + "| explained_variance | 0.0827 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34799 |\n", + "| policy_loss | 12.8 |\n", + "| std | 0.891 |\n", + "| value_loss | 2.64 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.82 |\n", + "| ep_rew_mean | -0.773 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 34900 |\n", + "| time_elapsed | 815 |\n", + "| total_timesteps | 174500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.98 |\n", + "| explained_variance | -0.858 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34899 |\n", + "| policy_loss | -3.64 |\n", + "| std | 0.888 |\n", + "| value_loss | 0.237 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.03 |\n", + "| ep_rew_mean | -0.823 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 35000 |\n", + "| time_elapsed | 817 |\n", + "| total_timesteps | 175000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.94 |\n", + "| explained_variance | -9.09 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 34999 |\n", + "| policy_loss | -1.61 |\n", + "| std | 0.883 |\n", + "| value_loss | 0.0613 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.99 |\n", + "| ep_rew_mean | -0.794 |\n", + "| time/ | |\n", + "| fps | 214 |\n", + "| iterations | 35100 |\n", + "| time_elapsed | 819 |\n", + "| total_timesteps | 175500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.93 |\n", + "| explained_variance | 0.0769 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35099 |\n", + "| policy_loss | -0.339 |\n", + "| std | 0.883 |\n", + "| value_loss | 0.0132 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.18 |\n", + "| ep_rew_mean | -0.696 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35200 |\n", + "| time_elapsed | 822 |\n", + "| total_timesteps | 176000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.92 |\n", + "| explained_variance | 0.591 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35199 |\n", + "| policy_loss | 6.37 |\n", + "| std | 0.881 |\n", + "| value_loss | 1.99 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.56 |\n", + "| ep_rew_mean | -0.749 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35300 |\n", + "| time_elapsed | 825 |\n", + "| total_timesteps | 176500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.9 |\n", + "| explained_variance | 0.67 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35299 |\n", + "| policy_loss | 0.123 |\n", + "| std | 0.879 |\n", + "| value_loss | 0.000809 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.49 |\n", + "| ep_rew_mean | -0.878 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35400 |\n", + "| time_elapsed | 828 |\n", + "| total_timesteps | 177000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.9 |\n", + "| explained_variance | -1.04 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35399 |\n", + "| policy_loss | 39.2 |\n", + "| std | 0.878 |\n", + "| value_loss | 25.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.72 |\n", + "| ep_rew_mean | -0.906 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35500 |\n", + "| time_elapsed | 830 |\n", + "| total_timesteps | 177500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.9 |\n", + "| explained_variance | -0.813 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35499 |\n", + "| policy_loss | 18.4 |\n", + "| std | 0.878 |\n", + "| value_loss | 13.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.4 |\n", + "| ep_rew_mean | -1.05 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35600 |\n", + "| time_elapsed | 832 |\n", + "| total_timesteps | 178000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.87 |\n", + "| explained_variance | -7.13 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35599 |\n", + "| policy_loss | -1 |\n", + "| std | 0.873 |\n", + "| value_loss | 0.0155 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 14.7 |\n", + "| ep_rew_mean | -1.42 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35700 |\n", + "| time_elapsed | 834 |\n", + "| total_timesteps | 178500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.88 |\n", + "| explained_variance | -7.62 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35699 |\n", + "| policy_loss | -0.701 |\n", + "| std | 0.876 |\n", + "| value_loss | 0.00857 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 16.3 |\n", + "| ep_rew_mean | -1.65 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35800 |\n", + "| time_elapsed | 836 |\n", + "| total_timesteps | 179000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.88 |\n", + "| explained_variance | -12.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35799 |\n", + "| policy_loss | 2.78 |\n", + "| std | 0.876 |\n", + "| value_loss | 0.124 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 16.1 |\n", + "| ep_rew_mean | -1.65 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 35900 |\n", + "| time_elapsed | 839 |\n", + "| total_timesteps | 179500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.87 |\n", + "| explained_variance | 0.203 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35899 |\n", + "| policy_loss | -2.93 |\n", + "| std | 0.875 |\n", + "| value_loss | 0.133 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 15.9 |\n", + "| ep_rew_mean | -1.6 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36000 |\n", + "| time_elapsed | 841 |\n", + "| total_timesteps | 180000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.86 |\n", + "| explained_variance | -56.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 35999 |\n", + "| policy_loss | -1.15 |\n", + "| std | 0.874 |\n", + "| value_loss | 0.0208 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 18.4 |\n", + "| ep_rew_mean | -1.85 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36100 |\n", + "| time_elapsed | 844 |\n", + "| total_timesteps | 180500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.85 |\n", + "| explained_variance | -2.69 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36099 |\n", + "| policy_loss | 0.122 |\n", + "| std | 0.872 |\n", + "| value_loss | 0.00112 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.1 |\n", + "| ep_rew_mean | -2.24 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36200 |\n", + "| time_elapsed | 846 |\n", + "| total_timesteps | 181000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.85 |\n", + "| explained_variance | -0.413 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36199 |\n", + "| policy_loss | 0.17 |\n", + "| std | 0.873 |\n", + "| value_loss | 0.00142 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.7 |\n", + "| ep_rew_mean | -2.19 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36300 |\n", + "| time_elapsed | 848 |\n", + "| total_timesteps | 181500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.85 |\n", + "| explained_variance | -0.618 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36299 |\n", + "| policy_loss | 34.1 |\n", + "| std | 0.872 |\n", + "| value_loss | 20.6 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17.8 |\n", + "| ep_rew_mean | -1.82 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36400 |\n", + "| time_elapsed | 851 |\n", + "| total_timesteps | 182000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.86 |\n", + "| explained_variance | -0.0238 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36399 |\n", + "| policy_loss | -2.26 |\n", + "| std | 0.872 |\n", + "| value_loss | 0.0614 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.5 |\n", + "| ep_rew_mean | -1.08 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36500 |\n", + "| time_elapsed | 853 |\n", + "| total_timesteps | 182500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.84 |\n", + "| explained_variance | -9.32 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36499 |\n", + "| policy_loss | 33 |\n", + "| std | 0.871 |\n", + "| value_loss | 19.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.1 |\n", + "| ep_rew_mean | -0.943 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36600 |\n", + "| time_elapsed | 856 |\n", + "| total_timesteps | 183000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.84 |\n", + "| explained_variance | -2.44 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36599 |\n", + "| policy_loss | -1.35 |\n", + "| std | 0.87 |\n", + "| value_loss | 0.0543 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.22 |\n", + "| ep_rew_mean | -0.707 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36700 |\n", + "| time_elapsed | 858 |\n", + "| total_timesteps | 183500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.79 |\n", + "| explained_variance | 0.118 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36699 |\n", + "| policy_loss | 7.84 |\n", + "| std | 0.864 |\n", + "| value_loss | 1.45 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.5 |\n", + "| ep_rew_mean | -0.65 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36800 |\n", + "| time_elapsed | 860 |\n", + "| total_timesteps | 184000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.8 |\n", + "| explained_variance | 0.974 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36799 |\n", + "| policy_loss | 34.6 |\n", + "| std | 0.866 |\n", + "| value_loss | 15.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.05 |\n", + "| ep_rew_mean | -0.718 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 36900 |\n", + "| time_elapsed | 862 |\n", + "| total_timesteps | 184500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.77 |\n", + "| explained_variance | -2.59 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36899 |\n", + "| policy_loss | -0.682 |\n", + "| std | 0.862 |\n", + "| value_loss | 0.0374 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.38 |\n", + "| ep_rew_mean | -0.632 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37000 |\n", + "| time_elapsed | 865 |\n", + "| total_timesteps | 185000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.76 |\n", + "| explained_variance | -3.99 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 36999 |\n", + "| policy_loss | 2.48 |\n", + "| std | 0.861 |\n", + "| value_loss | 1.02 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.84 |\n", + "| ep_rew_mean | -0.57 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37100 |\n", + "| time_elapsed | 868 |\n", + "| total_timesteps | 185500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.74 |\n", + "| explained_variance | -0.648 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37099 |\n", + "| policy_loss | -0.939 |\n", + "| std | 0.86 |\n", + "| value_loss | 0.028 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.16 |\n", + "| ep_rew_mean | -0.607 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37200 |\n", + "| time_elapsed | 871 |\n", + "| total_timesteps | 186000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.71 |\n", + "| explained_variance | -0.928 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37199 |\n", + "| policy_loss | -2.97 |\n", + "| std | 0.857 |\n", + "| value_loss | 0.156 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.12 |\n", + "| ep_rew_mean | -0.537 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37300 |\n", + "| time_elapsed | 873 |\n", + "| total_timesteps | 186500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.73 |\n", + "| explained_variance | -24.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37299 |\n", + "| policy_loss | 8.62 |\n", + "| std | 0.859 |\n", + "| value_loss | 1.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.33 |\n", + "| ep_rew_mean | -0.552 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37400 |\n", + "| time_elapsed | 875 |\n", + "| total_timesteps | 187000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.71 |\n", + "| explained_variance | -0.0602 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37399 |\n", + "| policy_loss | -0.707 |\n", + "| std | 0.857 |\n", + "| value_loss | 0.0155 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.49 |\n", + "| ep_rew_mean | -0.577 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37500 |\n", + "| time_elapsed | 878 |\n", + "| total_timesteps | 187500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.69 |\n", + "| explained_variance | -0.697 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37499 |\n", + "| policy_loss | 5.95 |\n", + "| std | 0.854 |\n", + "| value_loss | 0.698 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.37 |\n", + "| ep_rew_mean | -0.555 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37600 |\n", + "| time_elapsed | 880 |\n", + "| total_timesteps | 188000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.69 |\n", + "| explained_variance | -0.828 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37599 |\n", + "| policy_loss | 0.693 |\n", + "| std | 0.853 |\n", + "| value_loss | 0.129 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.37 |\n", + "| ep_rew_mean | -0.542 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37700 |\n", + "| time_elapsed | 883 |\n", + "| total_timesteps | 188500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.68 |\n", + "| explained_variance | -1.14 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37699 |\n", + "| policy_loss | -2.09 |\n", + "| std | 0.853 |\n", + "| value_loss | 0.0667 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.17 |\n", + "| ep_rew_mean | -0.611 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37800 |\n", + "| time_elapsed | 885 |\n", + "| total_timesteps | 189000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.65 |\n", + "| explained_variance | -0.175 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37799 |\n", + "| policy_loss | -1.87 |\n", + "| std | 0.849 |\n", + "| value_loss | 0.0522 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.19 |\n", + "| ep_rew_mean | -0.625 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 37900 |\n", + "| time_elapsed | 887 |\n", + "| total_timesteps | 189500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.63 |\n", + "| explained_variance | 0.604 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37899 |\n", + "| policy_loss | 5.17 |\n", + "| std | 0.848 |\n", + "| value_loss | 0.386 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.54 |\n", + "| ep_rew_mean | -0.557 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38000 |\n", + "| time_elapsed | 890 |\n", + "| total_timesteps | 190000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.61 |\n", + "| explained_variance | 0.536 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 37999 |\n", + "| policy_loss | 1.16 |\n", + "| std | 0.845 |\n", + "| value_loss | 0.0336 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.48 |\n", + "| ep_rew_mean | -0.553 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38100 |\n", + "| time_elapsed | 892 |\n", + "| total_timesteps | 190500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.6 |\n", + "| explained_variance | 0.19 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38099 |\n", + "| policy_loss | -1.62 |\n", + "| std | 0.844 |\n", + "| value_loss | 0.062 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.13 |\n", + "| ep_rew_mean | -0.539 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38200 |\n", + "| time_elapsed | 894 |\n", + "| total_timesteps | 191000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.59 |\n", + "| explained_variance | 0.108 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38199 |\n", + "| policy_loss | 5.95 |\n", + "| std | 0.842 |\n", + "| value_loss | 1.93 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.32 |\n", + "| ep_rew_mean | -0.713 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38300 |\n", + "| time_elapsed | 896 |\n", + "| total_timesteps | 191500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.58 |\n", + "| explained_variance | -7.62 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38299 |\n", + "| policy_loss | -1.45 |\n", + "| std | 0.842 |\n", + "| value_loss | 0.0577 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.37 |\n", + "| ep_rew_mean | -0.952 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38400 |\n", + "| time_elapsed | 899 |\n", + "| total_timesteps | 192000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.59 |\n", + "| explained_variance | -6.86 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38399 |\n", + "| policy_loss | -4.58 |\n", + "| std | 0.842 |\n", + "| value_loss | 0.306 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.36 |\n", + "| ep_rew_mean | -0.545 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38500 |\n", + "| time_elapsed | 901 |\n", + "| total_timesteps | 192500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.58 |\n", + "| explained_variance | -0.256 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38499 |\n", + "| policy_loss | -0.731 |\n", + "| std | 0.841 |\n", + "| value_loss | 0.0341 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.75 |\n", + "| ep_rew_mean | -0.496 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38600 |\n", + "| time_elapsed | 904 |\n", + "| total_timesteps | 193000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.59 |\n", + "| explained_variance | -25.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38599 |\n", + "| policy_loss | 5.09 |\n", + "| std | 0.84 |\n", + "| value_loss | 0.549 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.99 |\n", + "| ep_rew_mean | -0.514 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38700 |\n", + "| time_elapsed | 906 |\n", + "| total_timesteps | 193500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.59 |\n", + "| explained_variance | 0.813 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38699 |\n", + "| policy_loss | -1.23 |\n", + "| std | 0.84 |\n", + "| value_loss | 0.0226 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.44 |\n", + "| ep_rew_mean | -0.608 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38800 |\n", + "| time_elapsed | 908 |\n", + "| total_timesteps | 194000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.57 |\n", + "| explained_variance | 0.395 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38799 |\n", + "| policy_loss | -1.68 |\n", + "| std | 0.838 |\n", + "| value_loss | 0.037 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.06 |\n", + "| ep_rew_mean | -0.626 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 38900 |\n", + "| time_elapsed | 910 |\n", + "| total_timesteps | 194500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | -0.051 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38899 |\n", + "| policy_loss | -1.68 |\n", + "| std | 0.837 |\n", + "| value_loss | 0.0596 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.39 |\n", + "| ep_rew_mean | -0.694 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39000 |\n", + "| time_elapsed | 912 |\n", + "| total_timesteps | 195000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.54 |\n", + "| explained_variance | 0.107 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 38999 |\n", + "| policy_loss | 15 |\n", + "| std | 0.836 |\n", + "| value_loss | 3.82 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.82 |\n", + "| ep_rew_mean | -0.791 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39100 |\n", + "| time_elapsed | 915 |\n", + "| total_timesteps | 195500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | -0.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39099 |\n", + "| policy_loss | -2.78 |\n", + "| std | 0.837 |\n", + "| value_loss | 0.112 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.22 |\n", + "| ep_rew_mean | -0.523 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39200 |\n", + "| time_elapsed | 918 |\n", + "| total_timesteps | 196000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | 0.411 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39199 |\n", + "| policy_loss | -0.999 |\n", + "| std | 0.834 |\n", + "| value_loss | 0.0289 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.81 |\n", + "| ep_rew_mean | -0.627 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39300 |\n", + "| time_elapsed | 920 |\n", + "| total_timesteps | 196500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | -0.0552 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39299 |\n", + "| policy_loss | -4.89 |\n", + "| std | 0.834 |\n", + "| value_loss | 0.411 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.18 |\n", + "| ep_rew_mean | -0.826 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39400 |\n", + "| time_elapsed | 922 |\n", + "| total_timesteps | 197000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.54 |\n", + "| explained_variance | 0.568 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39399 |\n", + "| policy_loss | 0.375 |\n", + "| std | 0.834 |\n", + "| value_loss | 0.0048 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.39 |\n", + "| ep_rew_mean | -0.778 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39500 |\n", + "| time_elapsed | 924 |\n", + "| total_timesteps | 197500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | -0.0376 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39499 |\n", + "| policy_loss | 4.76 |\n", + "| std | 0.834 |\n", + "| value_loss | 0.424 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.32 |\n", + "| ep_rew_mean | -0.54 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39600 |\n", + "| time_elapsed | 926 |\n", + "| total_timesteps | 198000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.48 |\n", + "| explained_variance | -5.63 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39599 |\n", + "| policy_loss | 1.18 |\n", + "| std | 0.827 |\n", + "| value_loss | 0.0633 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.89 |\n", + "| ep_rew_mean | -0.732 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39700 |\n", + "| time_elapsed | 929 |\n", + "| total_timesteps | 198500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | -6.55 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39699 |\n", + "| policy_loss | -2.89 |\n", + "| std | 0.829 |\n", + "| value_loss | 0.168 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.49 |\n", + "| ep_rew_mean | -0.869 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39800 |\n", + "| time_elapsed | 932 |\n", + "| total_timesteps | 199000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | 0.678 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39799 |\n", + "| policy_loss | 5.14 |\n", + "| std | 0.832 |\n", + "| value_loss | 2.83 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.86 |\n", + "| ep_rew_mean | -0.724 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 39900 |\n", + "| time_elapsed | 934 |\n", + "| total_timesteps | 199500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -3.38 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39899 |\n", + "| policy_loss | -3.47 |\n", + "| std | 0.832 |\n", + "| value_loss | 0.298 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.14 |\n", + "| ep_rew_mean | -0.574 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40000 |\n", + "| time_elapsed | 936 |\n", + "| total_timesteps | 200000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | 0.0396 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 39999 |\n", + "| policy_loss | 14 |\n", + "| std | 0.832 |\n", + "| value_loss | 4.51 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.41 |\n", + "| ep_rew_mean | -0.915 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40100 |\n", + "| time_elapsed | 938 |\n", + "| total_timesteps | 200500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | -28.2 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40099 |\n", + "| policy_loss | -3.72 |\n", + "| std | 0.834 |\n", + "| value_loss | 0.278 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.4 |\n", + "| ep_rew_mean | -1.29 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40200 |\n", + "| time_elapsed | 941 |\n", + "| total_timesteps | 201000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | -0.554 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40199 |\n", + "| policy_loss | -5.26 |\n", + "| std | 0.835 |\n", + "| value_loss | 0.394 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.78 |\n", + "| ep_rew_mean | -0.538 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40300 |\n", + "| time_elapsed | 943 |\n", + "| total_timesteps | 201500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | -0.486 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40299 |\n", + "| policy_loss | -1.59 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.0358 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.09 |\n", + "| ep_rew_mean | -0.79 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40400 |\n", + "| time_elapsed | 946 |\n", + "| total_timesteps | 202000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -136 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40399 |\n", + "| policy_loss | 49.3 |\n", + "| std | 0.828 |\n", + "| value_loss | 28.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.38 |\n", + "| ep_rew_mean | -0.373 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40500 |\n", + "| time_elapsed | 948 |\n", + "| total_timesteps | 202500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | -21.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40499 |\n", + "| policy_loss | 4.35 |\n", + "| std | 0.828 |\n", + "| value_loss | 0.355 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.42 |\n", + "| ep_rew_mean | -0.347 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40600 |\n", + "| time_elapsed | 950 |\n", + "| total_timesteps | 203000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -32.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40599 |\n", + "| policy_loss | 11.8 |\n", + "| std | 0.829 |\n", + "| value_loss | 1.94 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.23 |\n", + "| ep_rew_mean | -0.454 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40700 |\n", + "| time_elapsed | 953 |\n", + "| total_timesteps | 203500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | -2.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40699 |\n", + "| policy_loss | 1.51 |\n", + "| std | 0.828 |\n", + "| value_loss | 0.17 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.48 |\n", + "| ep_rew_mean | -0.45 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40800 |\n", + "| time_elapsed | 956 |\n", + "| total_timesteps | 204000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | -0.871 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40799 |\n", + "| policy_loss | 2.14 |\n", + "| std | 0.829 |\n", + "| value_loss | 0.0793 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.78 |\n", + "| ep_rew_mean | -0.403 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 40900 |\n", + "| time_elapsed | 958 |\n", + "| total_timesteps | 204500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -5.88 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40899 |\n", + "| policy_loss | 9.99 |\n", + "| std | 0.83 |\n", + "| value_loss | 1.3 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.02 |\n", + "| ep_rew_mean | -0.747 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41000 |\n", + "| time_elapsed | 960 |\n", + "| total_timesteps | 205000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | -0.795 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 40999 |\n", + "| policy_loss | 3.54 |\n", + "| std | 0.832 |\n", + "| value_loss | 0.124 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.78 |\n", + "| ep_rew_mean | -0.913 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41100 |\n", + "| time_elapsed | 962 |\n", + "| total_timesteps | 205500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | 0.324 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41099 |\n", + "| policy_loss | 4.36 |\n", + "| std | 0.836 |\n", + "| value_loss | 0.973 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.42 |\n", + "| ep_rew_mean | -0.468 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41200 |\n", + "| time_elapsed | 964 |\n", + "| total_timesteps | 206000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.56 |\n", + "| explained_variance | 0.645 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41199 |\n", + "| policy_loss | -1.56 |\n", + "| std | 0.838 |\n", + "| value_loss | 0.0493 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.15 |\n", + "| ep_rew_mean | -0.436 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41300 |\n", + "| time_elapsed | 967 |\n", + "| total_timesteps | 206500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.57 |\n", + "| explained_variance | 0.53 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41299 |\n", + "| policy_loss | 1.25 |\n", + "| std | 0.838 |\n", + "| value_loss | 0.0425 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.46 |\n", + "| ep_rew_mean | -0.6 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41400 |\n", + "| time_elapsed | 970 |\n", + "| total_timesteps | 207000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.57 |\n", + "| explained_variance | 1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41399 |\n", + "| policy_loss | 0.223 |\n", + "| std | 0.838 |\n", + "| value_loss | 0.0114 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.51 |\n", + "| ep_rew_mean | -0.677 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41500 |\n", + "| time_elapsed | 972 |\n", + "| total_timesteps | 207500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.54 |\n", + "| explained_variance | -2.53 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41499 |\n", + "| policy_loss | 8.54 |\n", + "| std | 0.835 |\n", + "| value_loss | 2.33 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.46 |\n", + "| ep_rew_mean | -0.563 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41600 |\n", + "| time_elapsed | 974 |\n", + "| total_timesteps | 208000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | -125 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41599 |\n", + "| policy_loss | -3.69 |\n", + "| std | 0.834 |\n", + "| value_loss | 0.958 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.05 |\n", + "| ep_rew_mean | -0.648 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41700 |\n", + "| time_elapsed | 976 |\n", + "| total_timesteps | 208500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | -4.36 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41699 |\n", + "| policy_loss | 3.82 |\n", + "| std | 0.836 |\n", + "| value_loss | 0.415 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 5.47 |\n", + "| ep_rew_mean | -0.475 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41800 |\n", + "| time_elapsed | 979 |\n", + "| total_timesteps | 209000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.54 |\n", + "| explained_variance | -0.948 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41799 |\n", + "| policy_loss | -0.0613 |\n", + "| std | 0.835 |\n", + "| value_loss | 0.0169 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 6.86 |\n", + "| ep_rew_mean | -0.648 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 41900 |\n", + "| time_elapsed | 981 |\n", + "| total_timesteps | 209500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | 0.497 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41899 |\n", + "| policy_loss | 7.1 |\n", + "| std | 0.831 |\n", + "| value_loss | 1.63 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.13 |\n", + "| ep_rew_mean | -0.994 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42000 |\n", + "| time_elapsed | 983 |\n", + "| total_timesteps | 210000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | 0.352 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 41999 |\n", + "| policy_loss | 12.6 |\n", + "| std | 0.834 |\n", + "| value_loss | 9.03 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10 |\n", + "| ep_rew_mean | -1.08 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42100 |\n", + "| time_elapsed | 986 |\n", + "| total_timesteps | 210500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | 0.733 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42099 |\n", + "| policy_loss | -5.63 |\n", + "| std | 0.834 |\n", + "| value_loss | 5.27 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.87 |\n", + "| ep_rew_mean | -1.07 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42200 |\n", + "| time_elapsed | 988 |\n", + "| total_timesteps | 211000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | -2.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42199 |\n", + "| policy_loss | -1.47 |\n", + "| std | 0.832 |\n", + "| value_loss | 0.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 13.8 |\n", + "| ep_rew_mean | -1.54 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42300 |\n", + "| time_elapsed | 990 |\n", + "| total_timesteps | 211500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.55 |\n", + "| explained_variance | -3.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42299 |\n", + "| policy_loss | 2.2 |\n", + "| std | 0.834 |\n", + "| value_loss | 0.156 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17.5 |\n", + "| ep_rew_mean | -2.22 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42400 |\n", + "| time_elapsed | 993 |\n", + "| total_timesteps | 212000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -4.49 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42399 |\n", + "| policy_loss | -0.857 |\n", + "| std | 0.829 |\n", + "| value_loss | 0.0175 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.9 |\n", + "| ep_rew_mean | -2.95 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42500 |\n", + "| time_elapsed | 996 |\n", + "| total_timesteps | 212500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -40 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42499 |\n", + "| policy_loss | 1.06 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.0349 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 24.9 |\n", + "| ep_rew_mean | -3.75 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42600 |\n", + "| time_elapsed | 998 |\n", + "| total_timesteps | 213000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -131 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42599 |\n", + "| policy_loss | 1.74 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.0545 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.6 |\n", + "| ep_rew_mean | -4.32 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42700 |\n", + "| time_elapsed | 1000 |\n", + "| total_timesteps | 213500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -2.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42699 |\n", + "| policy_loss | 0.485 |\n", + "| std | 0.829 |\n", + "| value_loss | 0.0148 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.5 |\n", + "| ep_rew_mean | -4.6 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42800 |\n", + "| time_elapsed | 1002 |\n", + "| total_timesteps | 214000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -52.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42799 |\n", + "| policy_loss | -0.654 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.0313 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 34.6 |\n", + "| ep_rew_mean | -5.2 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 42900 |\n", + "| time_elapsed | 1005 |\n", + "| total_timesteps | 214500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | -3.73 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42899 |\n", + "| policy_loss | -1.45 |\n", + "| std | 0.832 |\n", + "| value_loss | 0.0453 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 33.5 |\n", + "| ep_rew_mean | -5.03 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43000 |\n", + "| time_elapsed | 1008 |\n", + "| total_timesteps | 215000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -82.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 42999 |\n", + "| policy_loss | -1.39 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.0641 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 31.9 |\n", + "| ep_rew_mean | -4.61 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43100 |\n", + "| time_elapsed | 1010 |\n", + "| total_timesteps | 215500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | -4.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43099 |\n", + "| policy_loss | 2.73 |\n", + "| std | 0.828 |\n", + "| value_loss | 0.115 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25.4 |\n", + "| ep_rew_mean | -3.24 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43200 |\n", + "| time_elapsed | 1013 |\n", + "| total_timesteps | 216000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | -53.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43199 |\n", + "| policy_loss | -2.6 |\n", + "| std | 0.827 |\n", + "| value_loss | 0.21 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.4 |\n", + "| ep_rew_mean | -2.64 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43300 |\n", + "| time_elapsed | 1015 |\n", + "| total_timesteps | 216500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -15.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43299 |\n", + "| policy_loss | -3 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.127 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20 |\n", + "| ep_rew_mean | -2.49 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43400 |\n", + "| time_elapsed | 1018 |\n", + "| total_timesteps | 217000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -13.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43399 |\n", + "| policy_loss | 3.72 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.114 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17.3 |\n", + "| ep_rew_mean | -1.95 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43500 |\n", + "| time_elapsed | 1020 |\n", + "| total_timesteps | 217500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -0.127 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43499 |\n", + "| policy_loss | 47.3 |\n", + "| std | 0.829 |\n", + "| value_loss | 51.2 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 14.9 |\n", + "| ep_rew_mean | -1.54 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43600 |\n", + "| time_elapsed | 1022 |\n", + "| total_timesteps | 218000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -0.629 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43599 |\n", + "| policy_loss | -0.329 |\n", + "| std | 0.829 |\n", + "| value_loss | 0.00375 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 14.2 |\n", + "| ep_rew_mean | -1.44 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43700 |\n", + "| time_elapsed | 1025 |\n", + "| total_timesteps | 218500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | -1.74 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43699 |\n", + "| policy_loss | -1.8 |\n", + "| std | 0.831 |\n", + "| value_loss | 0.066 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 15.2 |\n", + "| ep_rew_mean | -1.6 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43800 |\n", + "| time_elapsed | 1027 |\n", + "| total_timesteps | 219000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.48 |\n", + "| explained_variance | -0.827 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43799 |\n", + "| policy_loss | 0.286 |\n", + "| std | 0.826 |\n", + "| value_loss | 0.00513 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 18.1 |\n", + "| ep_rew_mean | -1.97 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 43900 |\n", + "| time_elapsed | 1029 |\n", + "| total_timesteps | 219500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.48 |\n", + "| explained_variance | 0.0961 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43899 |\n", + "| policy_loss | -0.834 |\n", + "| std | 0.826 |\n", + "| value_loss | 0.0225 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.3 |\n", + "| ep_rew_mean | -2.36 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 44000 |\n", + "| time_elapsed | 1032 |\n", + "| total_timesteps | 220000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | 0.556 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 43999 |\n", + "| policy_loss | 0.284 |\n", + "| std | 0.829 |\n", + "| value_loss | 0.0177 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21 |\n", + "| ep_rew_mean | -2.53 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 44100 |\n", + "| time_elapsed | 1035 |\n", + "| total_timesteps | 220500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | 0.338 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44099 |\n", + "| policy_loss | 66.9 |\n", + "| std | 0.831 |\n", + "| value_loss | 85.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.3 |\n", + "| ep_rew_mean | -2.75 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 44200 |\n", + "| time_elapsed | 1037 |\n", + "| total_timesteps | 221000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | 0.392 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44199 |\n", + "| policy_loss | 0.803 |\n", + "| std | 0.831 |\n", + "| value_loss | 0.0111 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.1 |\n", + "| ep_rew_mean | -2.96 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 44300 |\n", + "| time_elapsed | 1039 |\n", + "| total_timesteps | 221500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -4.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44299 |\n", + "| policy_loss | 0.67 |\n", + "| std | 0.829 |\n", + "| value_loss | 0.019 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.2 |\n", + "| ep_rew_mean | -2.88 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 44400 |\n", + "| time_elapsed | 1041 |\n", + "| total_timesteps | 222000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.54 |\n", + "| explained_variance | 0.961 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44399 |\n", + "| policy_loss | 0.0447 |\n", + "| std | 0.832 |\n", + "| value_loss | 0.0199 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.6 |\n", + "| ep_rew_mean | -2.72 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 44500 |\n", + "| time_elapsed | 1045 |\n", + "| total_timesteps | 222500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | 0.564 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44499 |\n", + "| policy_loss | -1.71 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.0518 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.2 |\n", + "| ep_rew_mean | -2.64 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 44600 |\n", + "| time_elapsed | 1047 |\n", + "| total_timesteps | 223000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.54 |\n", + "| explained_variance | -1.6 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44599 |\n", + "| policy_loss | -0.0737 |\n", + "| std | 0.831 |\n", + "| value_loss | 0.00317 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.5 |\n", + "| ep_rew_mean | -2.81 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 44700 |\n", + "| time_elapsed | 1049 |\n", + "| total_timesteps | 223500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.52 |\n", + "| explained_variance | -2.55 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44699 |\n", + "| policy_loss | -0.534 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.00457 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 22.1 |\n", + "| ep_rew_mean | -2.74 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 44800 |\n", + "| time_elapsed | 1051 |\n", + "| total_timesteps | 224000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.5 |\n", + "| explained_variance | -3.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44799 |\n", + "| policy_loss | -0.523 |\n", + "| std | 0.828 |\n", + "| value_loss | 0.0107 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21.2 |\n", + "| ep_rew_mean | -2.57 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 44900 |\n", + "| time_elapsed | 1054 |\n", + "| total_timesteps | 224500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | 0.555 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44899 |\n", + "| policy_loss | -0.784 |\n", + "| std | 0.827 |\n", + "| value_loss | 0.0117 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 23.6 |\n", + "| ep_rew_mean | -2.77 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45000 |\n", + "| time_elapsed | 1056 |\n", + "| total_timesteps | 225000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | -19.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 44999 |\n", + "| policy_loss | 0.149 |\n", + "| std | 0.827 |\n", + "| value_loss | 0.00261 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 26.5 |\n", + "| ep_rew_mean | -3.12 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45100 |\n", + "| time_elapsed | 1059 |\n", + "| total_timesteps | 225500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.53 |\n", + "| explained_variance | -5.61 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45099 |\n", + "| policy_loss | 0.0418 |\n", + "| std | 0.832 |\n", + "| value_loss | 0.00496 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.3 |\n", + "| ep_rew_mean | -3.29 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45200 |\n", + "| time_elapsed | 1061 |\n", + "| total_timesteps | 226000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.54 |\n", + "| explained_variance | -89.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45199 |\n", + "| policy_loss | 0.533 |\n", + "| std | 0.833 |\n", + "| value_loss | 0.0217 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 27.6 |\n", + "| ep_rew_mean | -3.14 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45300 |\n", + "| time_elapsed | 1063 |\n", + "| total_timesteps | 226500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.51 |\n", + "| explained_variance | -5.7 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45299 |\n", + "| policy_loss | 2.17 |\n", + "| std | 0.83 |\n", + "| value_loss | 0.0799 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 28.7 |\n", + "| ep_rew_mean | -3.38 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45400 |\n", + "| time_elapsed | 1065 |\n", + "| total_timesteps | 227000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.49 |\n", + "| explained_variance | 0.352 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45399 |\n", + "| policy_loss | 32.3 |\n", + "| std | 0.828 |\n", + "| value_loss | 46.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.6 |\n", + "| ep_rew_mean | -3.68 |\n", + "| time/ | |\n", + "| fps | 213 |\n", + "| iterations | 45500 |\n", + "| time_elapsed | 1067 |\n", + "| total_timesteps | 227500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.45 |\n", + "| explained_variance | -23.8 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45499 |\n", + "| policy_loss | 0.00402 |\n", + "| std | 0.824 |\n", + "| value_loss | 0.00125 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 30.3 |\n", + "| ep_rew_mean | -3.64 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45600 |\n", + "| time_elapsed | 1071 |\n", + "| total_timesteps | 228000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.41 |\n", + "| explained_variance | -0.362 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45599 |\n", + "| policy_loss | -0.421 |\n", + "| std | 0.821 |\n", + "| value_loss | 0.00643 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.5 |\n", + "| ep_rew_mean | -3.52 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45700 |\n", + "| time_elapsed | 1073 |\n", + "| total_timesteps | 228500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.4 |\n", + "| explained_variance | 0.214 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45699 |\n", + "| policy_loss | 26.4 |\n", + "| std | 0.819 |\n", + "| value_loss | 28.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 29.7 |\n", + "| ep_rew_mean | -3.56 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45800 |\n", + "| time_elapsed | 1075 |\n", + "| total_timesteps | 229000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.38 |\n", + "| explained_variance | -5.01 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45799 |\n", + "| policy_loss | 0.152 |\n", + "| std | 0.817 |\n", + "| value_loss | 0.0019 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 25 |\n", + "| ep_rew_mean | -2.94 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 45900 |\n", + "| time_elapsed | 1077 |\n", + "| total_timesteps | 229500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.38 |\n", + "| explained_variance | 0.584 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45899 |\n", + "| policy_loss | 12 |\n", + "| std | 0.817 |\n", + "| value_loss | 7.89 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 20.9 |\n", + "| ep_rew_mean | -2.31 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46000 |\n", + "| time_elapsed | 1079 |\n", + "| total_timesteps | 230000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.35 |\n", + "| explained_variance | 0.369 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 45999 |\n", + "| policy_loss | 9.14 |\n", + "| std | 0.814 |\n", + "| value_loss | 13.8 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 19.9 |\n", + "| ep_rew_mean | -2.18 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46100 |\n", + "| time_elapsed | 1082 |\n", + "| total_timesteps | 230500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.32 |\n", + "| explained_variance | -2.14 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46099 |\n", + "| policy_loss | -2.3 |\n", + "| std | 0.81 |\n", + "| value_loss | 0.0864 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 16.5 |\n", + "| ep_rew_mean | -1.79 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46200 |\n", + "| time_elapsed | 1085 |\n", + "| total_timesteps | 231000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.34 |\n", + "| explained_variance | -1.16 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46199 |\n", + "| policy_loss | 0.0412 |\n", + "| std | 0.813 |\n", + "| value_loss | 0.00237 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 18.5 |\n", + "| ep_rew_mean | -2.01 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46300 |\n", + "| time_elapsed | 1087 |\n", + "| total_timesteps | 231500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.34 |\n", + "| explained_variance | -4.59 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46299 |\n", + "| policy_loss | 0.152 |\n", + "| std | 0.813 |\n", + "| value_loss | 0.000828 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 16.9 |\n", + "| ep_rew_mean | -1.86 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46400 |\n", + "| time_elapsed | 1089 |\n", + "| total_timesteps | 232000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.33 |\n", + "| explained_variance | -3.32 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46399 |\n", + "| policy_loss | 0.712 |\n", + "| std | 0.811 |\n", + "| value_loss | 0.0174 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17.8 |\n", + "| ep_rew_mean | -2.08 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46500 |\n", + "| time_elapsed | 1091 |\n", + "| total_timesteps | 232500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.33 |\n", + "| explained_variance | 0.971 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46499 |\n", + "| policy_loss | -0.512 |\n", + "| std | 0.811 |\n", + "| value_loss | 0.00613 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 21 |\n", + "| ep_rew_mean | -2.6 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46600 |\n", + "| time_elapsed | 1093 |\n", + "| total_timesteps | 233000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.36 |\n", + "| explained_variance | -0.171 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46599 |\n", + "| policy_loss | 0.657 |\n", + "| std | 0.816 |\n", + "| value_loss | 0.00852 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 18.4 |\n", + "| ep_rew_mean | -2.22 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46700 |\n", + "| time_elapsed | 1097 |\n", + "| total_timesteps | 233500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.35 |\n", + "| explained_variance | -12.4 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46699 |\n", + "| policy_loss | 0.196 |\n", + "| std | 0.814 |\n", + "| value_loss | 0.000866 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17.6 |\n", + "| ep_rew_mean | -2 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46800 |\n", + "| time_elapsed | 1099 |\n", + "| total_timesteps | 234000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.36 |\n", + "| explained_variance | 0.312 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46799 |\n", + "| policy_loss | 21.9 |\n", + "| std | 0.815 |\n", + "| value_loss | 15.1 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.9 |\n", + "| ep_rew_mean | -0.988 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 46900 |\n", + "| time_elapsed | 1101 |\n", + "| total_timesteps | 234500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.36 |\n", + "| explained_variance | -6.13 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46899 |\n", + "| policy_loss | 39.7 |\n", + "| std | 0.815 |\n", + "| value_loss | 20.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.1 |\n", + "| ep_rew_mean | -0.929 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47000 |\n", + "| time_elapsed | 1103 |\n", + "| total_timesteps | 235000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.36 |\n", + "| explained_variance | -22 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 46999 |\n", + "| policy_loss | 0.883 |\n", + "| std | 0.814 |\n", + "| value_loss | 0.0223 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.8 |\n", + "| ep_rew_mean | -1.01 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47100 |\n", + "| time_elapsed | 1105 |\n", + "| total_timesteps | 235500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.36 |\n", + "| explained_variance | -0.929 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47099 |\n", + "| policy_loss | 16.5 |\n", + "| std | 0.815 |\n", + "| value_loss | 12.4 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.4 |\n", + "| ep_rew_mean | -0.942 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47200 |\n", + "| time_elapsed | 1108 |\n", + "| total_timesteps | 236000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.35 |\n", + "| explained_variance | -0.097 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47199 |\n", + "| policy_loss | -0.956 |\n", + "| std | 0.814 |\n", + "| value_loss | 0.019 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.3 |\n", + "| ep_rew_mean | -0.944 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47300 |\n", + "| time_elapsed | 1111 |\n", + "| total_timesteps | 236500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.36 |\n", + "| explained_variance | 0.821 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47299 |\n", + "| policy_loss | 1.25 |\n", + "| std | 0.814 |\n", + "| value_loss | 0.0215 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.3 |\n", + "| ep_rew_mean | -1.16 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47400 |\n", + "| time_elapsed | 1113 |\n", + "| total_timesteps | 237000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.33 |\n", + "| explained_variance | -6.92 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47399 |\n", + "| policy_loss | -0.515 |\n", + "| std | 0.81 |\n", + "| value_loss | 0.0102 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 14.8 |\n", + "| ep_rew_mean | -1.43 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47500 |\n", + "| time_elapsed | 1115 |\n", + "| total_timesteps | 237500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.28 |\n", + "| explained_variance | -0.875 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47499 |\n", + "| policy_loss | 1.17 |\n", + "| std | 0.805 |\n", + "| value_loss | 0.0417 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17 |\n", + "| ep_rew_mean | -1.66 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47600 |\n", + "| time_elapsed | 1118 |\n", + "| total_timesteps | 238000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.27 |\n", + "| explained_variance | -0.103 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47599 |\n", + "| policy_loss | 29.7 |\n", + "| std | 0.802 |\n", + "| value_loss | 19.9 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 17.1 |\n", + "| ep_rew_mean | -1.69 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47700 |\n", + "| time_elapsed | 1120 |\n", + "| total_timesteps | 238500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.25 |\n", + "| explained_variance | -1.41 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47699 |\n", + "| policy_loss | -0.118 |\n", + "| std | 0.8 |\n", + "| value_loss | 0.00588 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 15.9 |\n", + "| ep_rew_mean | -1.58 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47800 |\n", + "| time_elapsed | 1123 |\n", + "| total_timesteps | 239000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.25 |\n", + "| explained_variance | -348 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47799 |\n", + "| policy_loss | 17.4 |\n", + "| std | 0.8 |\n", + "| value_loss | 6.03 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.5 |\n", + "| ep_rew_mean | -1.1 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 47900 |\n", + "| time_elapsed | 1126 |\n", + "| total_timesteps | 239500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.22 |\n", + "| explained_variance | 0.191 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47899 |\n", + "| policy_loss | -3.29 |\n", + "| std | 0.796 |\n", + "| value_loss | 0.327 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.5 |\n", + "| ep_rew_mean | -0.971 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48000 |\n", + "| time_elapsed | 1128 |\n", + "| total_timesteps | 240000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.21 |\n", + "| explained_variance | -1.57 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 47999 |\n", + "| policy_loss | -3 |\n", + "| std | 0.796 |\n", + "| value_loss | 0.279 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.4 |\n", + "| ep_rew_mean | -1.07 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48100 |\n", + "| time_elapsed | 1130 |\n", + "| total_timesteps | 240500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.17 |\n", + "| explained_variance | -12.1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48099 |\n", + "| policy_loss | -1.27 |\n", + "| std | 0.791 |\n", + "| value_loss | 0.0316 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.5 |\n", + "| ep_rew_mean | -1.09 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48200 |\n", + "| time_elapsed | 1132 |\n", + "| total_timesteps | 241000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.16 |\n", + "| explained_variance | -3.29 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48199 |\n", + "| policy_loss | -2 |\n", + "| std | 0.79 |\n", + "| value_loss | 0.109 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.9 |\n", + "| ep_rew_mean | -1.02 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48300 |\n", + "| time_elapsed | 1135 |\n", + "| total_timesteps | 241500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.17 |\n", + "| explained_variance | 0.482 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48299 |\n", + "| policy_loss | 1.44 |\n", + "| std | 0.791 |\n", + "| value_loss | 0.0523 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.8 |\n", + "| ep_rew_mean | -0.988 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48400 |\n", + "| time_elapsed | 1138 |\n", + "| total_timesteps | 242000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.19 |\n", + "| explained_variance | 0.946 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48399 |\n", + "| policy_loss | 3.43 |\n", + "| std | 0.794 |\n", + "| value_loss | 0.741 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.9 |\n", + "| ep_rew_mean | -0.984 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48500 |\n", + "| time_elapsed | 1140 |\n", + "| total_timesteps | 242500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.17 |\n", + "| explained_variance | 0.686 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48499 |\n", + "| policy_loss | 2.49 |\n", + "| std | 0.791 |\n", + "| value_loss | 0.377 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.5 |\n", + "| ep_rew_mean | -1.03 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48600 |\n", + "| time_elapsed | 1142 |\n", + "| total_timesteps | 243000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.16 |\n", + "| explained_variance | 0.547 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48599 |\n", + "| policy_loss | -2.63 |\n", + "| std | 0.791 |\n", + "| value_loss | 0.0917 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.8 |\n", + "| ep_rew_mean | -1 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48700 |\n", + "| time_elapsed | 1144 |\n", + "| total_timesteps | 243500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.15 |\n", + "| explained_variance | -20.5 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48699 |\n", + "| policy_loss | -1.49 |\n", + "| std | 0.789 |\n", + "| value_loss | 0.0402 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10.4 |\n", + "| ep_rew_mean | -0.983 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48800 |\n", + "| time_elapsed | 1148 |\n", + "| total_timesteps | 244000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.12 |\n", + "| explained_variance | 0.921 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48799 |\n", + "| policy_loss | 2.53 |\n", + "| std | 0.786 |\n", + "| value_loss | 0.29 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.5 |\n", + "| ep_rew_mean | -1.1 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 48900 |\n", + "| time_elapsed | 1150 |\n", + "| total_timesteps | 244500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.13 |\n", + "| explained_variance | -1.97 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48899 |\n", + "| policy_loss | 44.2 |\n", + "| std | 0.787 |\n", + "| value_loss | 34.5 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.8 |\n", + "| ep_rew_mean | -1.08 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49000 |\n", + "| time_elapsed | 1152 |\n", + "| total_timesteps | 245000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.14 |\n", + "| explained_variance | -7.35 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 48999 |\n", + "| policy_loss | -1.5 |\n", + "| std | 0.789 |\n", + "| value_loss | 0.051 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 12.9 |\n", + "| ep_rew_mean | -1.22 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49100 |\n", + "| time_elapsed | 1154 |\n", + "| total_timesteps | 245500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.15 |\n", + "| explained_variance | 0.725 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49099 |\n", + "| policy_loss | -1.03 |\n", + "| std | 0.791 |\n", + "| value_loss | 0.0285 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.27 |\n", + "| ep_rew_mean | -0.881 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49200 |\n", + "| time_elapsed | 1157 |\n", + "| total_timesteps | 246000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.14 |\n", + "| explained_variance | -1.75 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49199 |\n", + "| policy_loss | -0.458 |\n", + "| std | 0.788 |\n", + "| value_loss | 0.0277 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 7.78 |\n", + "| ep_rew_mean | -0.719 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49300 |\n", + "| time_elapsed | 1159 |\n", + "| total_timesteps | 246500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.16 |\n", + "| explained_variance | -0.945 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49299 |\n", + "| policy_loss | -0.858 |\n", + "| std | 0.79 |\n", + "| value_loss | 0.0107 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.33 |\n", + "| ep_rew_mean | -0.716 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49400 |\n", + "| time_elapsed | 1162 |\n", + "| total_timesteps | 247000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.14 |\n", + "| explained_variance | -1.31 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49399 |\n", + "| policy_loss | 7.37 |\n", + "| std | 0.788 |\n", + "| value_loss | 0.954 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 8.69 |\n", + "| ep_rew_mean | -0.741 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49500 |\n", + "| time_elapsed | 1164 |\n", + "| total_timesteps | 247500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.1 |\n", + "| explained_variance | 0.273 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49499 |\n", + "| policy_loss | 11.5 |\n", + "| std | 0.783 |\n", + "| value_loss | 4.53 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.68 |\n", + "| ep_rew_mean | -0.882 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49600 |\n", + "| time_elapsed | 1167 |\n", + "| total_timesteps | 248000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.09 |\n", + "| explained_variance | -5.37 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49599 |\n", + "| policy_loss | -3.11 |\n", + "| std | 0.783 |\n", + "| value_loss | 0.178 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.3 |\n", + "| ep_rew_mean | -1.09 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49700 |\n", + "| time_elapsed | 1169 |\n", + "| total_timesteps | 248500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.09 |\n", + "| explained_variance | -10.9 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49699 |\n", + "| policy_loss | 1.5 |\n", + "| std | 0.783 |\n", + "| value_loss | 0.11 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 10 |\n", + "| ep_rew_mean | -0.96 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49800 |\n", + "| time_elapsed | 1171 |\n", + "| total_timesteps | 249000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.1 |\n", + "| explained_variance | -83.3 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49799 |\n", + "| policy_loss | 5.09 |\n", + "| std | 0.784 |\n", + "| value_loss | 1.63 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 9.64 |\n", + "| ep_rew_mean | -0.879 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 49900 |\n", + "| time_elapsed | 1174 |\n", + "| total_timesteps | 249500 |\n", + "| train/ | |\n", + "| entropy_loss | -8.1 |\n", + "| explained_variance | 1 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49899 |\n", + "| policy_loss | 0.433 |\n", + "| std | 0.784 |\n", + "| value_loss | 0.00432 |\n", + "------------------------------------\n", + "------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 11.8 |\n", + "| ep_rew_mean | -1.15 |\n", + "| time/ | |\n", + "| fps | 212 |\n", + "| iterations | 50000 |\n", + "| time_elapsed | 1176 |\n", + "| total_timesteps | 250000 |\n", + "| train/ | |\n", + "| entropy_loss | -8.07 |\n", + "| explained_variance | 0.846 |\n", + "| learning_rate | 0.0007 |\n", + "| n_updates | 49999 |\n", + "| policy_loss | 6.47 |\n", + "| std | 0.782 |\n", + "| value_loss | 2.7 |\n", + "------------------------------------\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "VBox(children=(Label(value='0.655 MB of 0.655 MB uploaded\\r'), FloatProgress(value=1.0, max=1.0)))" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "c14a53fd35174f3ba632a22e3c9dda47" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "<style>\n", + " table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n", + " .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n", + " .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n", + " </style>\n", + "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>rollout/ep_len_mean</td><td>███▇▇▅▄▃▃▂▅▇▆▄▃▆▅█▄▅▇▆▄▆▄▆▄▂▃▁▂▁▁▂▃▄▄▂▂▂</td></tr><tr><td>rollout/ep_rew_mean</td><td>▁▁▂▃▄▆▇▇▇▇▄▂▅▆▇▄▆▄▆▆▅▅▆▆▇▆▇█▇█████▇▆▇███</td></tr><tr><td>time/fps</td><td>▁▄▅▅▆▇▇▇███████████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇</td></tr><tr><td>train/entropy_loss</td><td>▂▁▁▁▁▁▂▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▇▇▆▇▇▇▇██</td></tr><tr><td>train/explained_variance</td><td>██████████████▇█▇▇█████▁████▇███████████</td></tr><tr><td>train/learning_rate</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>train/policy_loss</td><td>▂▂▇▂▂▆▂▂▅▂▁▂▂▂▁▂▂▂▂▂▂▂▂▂▁▂▂█▂▁▁▂▂▄▂▂▃▂▂▁</td></tr><tr><td>train/std</td><td>▇████▇▇▇▆▆▅▅▆▅▆▆▆▅▅▅▅▅▅▄▅▅▅▄▄▃▃▂▂▂▂▂▂▂▁▁</td></tr><tr><td>train/value_loss</td><td>▁▁█▁▁▃▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▂▁▁▂▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>global_step</td><td>250000</td></tr><tr><td>rollout/ep_len_mean</td><td>11.85</td></tr><tr><td>rollout/ep_rew_mean</td><td>-1.14974</td></tr><tr><td>time/fps</td><td>212.0</td></tr><tr><td>train/entropy_loss</td><td>-8.07425</td></tr><tr><td>train/explained_variance</td><td>0.84638</td></tr><tr><td>train/learning_rate</td><td>0.0007</td></tr><tr><td>train/policy_loss</td><td>6.47005</td></tr><tr><td>train/std</td><td>0.78183</td></tr><tr><td>train/value_loss</td><td>2.69757</td></tr></table><br/></div></div>" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + " View run <strong style=\"color:#cdcd00\">dashing-glitter-6</strong> at: <a href='https://wandb.ai/aiblackbelt/sb3-panda-reach/runs/ihcoeovn' target=\"_blank\">https://wandb.ai/aiblackbelt/sb3-panda-reach/runs/ihcoeovn</a><br/> View job at <a href='https://wandb.ai/aiblackbelt/sb3-panda-reach/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjE0NTc2ODkxNg==/version_details/v3' target=\"_blank\">https://wandb.ai/aiblackbelt/sb3-panda-reach/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjE0NTc2ODkxNg==/version_details/v3</a><br/>Synced 5 W&B file(s), 0 media file(s), 4 artifact file(s) and 3 other file(s)" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "<IPython.core.display.HTML object>" + ], + "text/html": [ + "Find logs at: <code>./wandb/run-20240305_210146-ihcoeovn/logs</code>" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "login(token=\"*********\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "chs528paf6QF", + "outputId": "0f7016c0-d3e9-4b4a-f358-b42b408c0448" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n", + "Token is valid (permission: write).\n", + "Your token has been saved to /root/.cache/huggingface/token\n", + "Login successful\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Save the trained model\n", + "model.save(\"ECL-TD-RL1-a2c_panda_reach.zip\")\n", + "\n", + "# Load the trained model\n", + "model = A2C.load(\"ECL-TD-RL1-a2c_panda_reach.zip\")\n", + "\n", + "push_to_hub(\n", + " repo_id=\"Karim-20/a2c_cartpole\",\n", + " filename=\"ECL-TD-RL1-a2c_panda_reach.zip\",\n", + " commit_message=\"Add PandaReachJointsDense-v2 environement, agent used to train is A2C\"\n", + ")\n" + ], + "metadata": { + "id": "Z02f1oIdRh28", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136, + "referenced_widgets": [ + "a47211d565fb45fe95b30b99885c4dbd", + "df1580d505af40828061a4c042546f76", + "4627448bbe6d40178e356623c281803f", + "92339b2faa4b47c69b5457aa9631edf7", + "a337aa65ec8b498f87cb74caa342a6a3", + "c1fbb5d26065440aa5eeee238d0d1d38", + "0447228f33a344cd91a2e18cc73cd63e", + "4657c6ea689f494a9254c69d2a08dd4e", + "ef2b6851a3784430949ecf77410e3386", + "178580de1f0b4840825b70ffebaf00c2", + "6aeac63ed6e840c59814923109a66f58" + ] + }, + "outputId": "694736b8-769a-42ba-dffe-41411433c2c8" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[38;5;4mℹ Pushing repo Karim-20/a2c_cartpole to the Hugging Face Hub\u001b[0m\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "ECL-TD-RL1-a2c_panda_reach.zip: 0%| | 0.00/110k [00:00<?, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "a47211d565fb45fe95b30b99885c4dbd" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[38;5;2m✔ Your model has been uploaded to the Hub, you can find it here:\n", + "https://huggingface.co/Karim-20/a2c_cartpole/tree/main/\u001b[0m\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "CommitInfo(commit_url='https://huggingface.co/Karim-20/a2c_cartpole/commit/e5577037d98c688e89cbf3851fb89b09bcf9ca81', commit_message='Add PandaReachJointsDense-v2 environement, agent used to train is A2C', commit_description='', oid='e5577037d98c688e89cbf3851fb89b09bcf9ca81', pr_url=None, pr_revision=None, pr_num=None)" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 19 + } + ] + } + ] +} \ No newline at end of file