{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 1.3594189882278442, "min": 1.3064628839492798, "max": 1.6253725290298462, "count": 5322 }, "SoccerTwos.Policy.Entropy.sum": { "value": 28145.41015625, "min": 13305.63671875, "max": 39303.515625, "count": 5322 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 75.95384615384616, "min": 43.27433628318584, "max": 151.15151515151516, "count": 5322 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 19748.0, "min": 7776.0, "max": 22768.0, "count": 5322 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1812.5153012580754, "min": 1748.0349224219829, "max": 1905.2482582965547, "count": 5322 }, "SoccerTwos.Self-play.ELO.sum": { "value": 235626.9891635498, "min": 105465.00773154317, "max": 422866.32403202355, "count": 5322 }, "SoccerTwos.Step.mean": { "value": 74999948.0, "min": 21789946.0, "max": 74999948.0, "count": 5322 }, "SoccerTwos.Step.sum": { "value": 74999948.0, "min": 21789946.0, "max": 74999948.0, "count": 5322 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": -0.02129240892827511, "min": -0.1412869542837143, "max": 0.07948355376720428, "count": 5322 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": -2.746720790863037, "min": -25.149078369140625, "max": 13.410202026367188, "count": 5322 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": -0.021920129656791687, "min": -0.14307013154029846, "max": 0.08113993704319, "count": 5322 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": -2.8276968002319336, "min": -25.46648406982422, "max": 13.153609275817871, "count": 5322 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5322 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 5322 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.04170542539552201, "min": -0.5404349187063793, "max": 0.37791667009393376, "count": 5322 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 5.379999876022339, "min": -68.32240009307861, "max": 62.00280004739761, "count": 5322 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.04170542539552201, "min": -0.5404349187063793, "max": 0.37791667009393376, "count": 5322 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 5.379999876022339, "min": -68.32240009307861, "max": 62.00280004739761, "count": 5322 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5322 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 5322 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.01749416682869196, "min": 0.009693108641658909, "max": 0.02733100582845509, "count": 2582 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.01749416682869196, "min": 0.009693108641658909, "max": 0.02733100582845509, "count": 2582 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.09392507051428159, "min": 0.05155490450561047, "max": 0.11744340111811956, "count": 2582 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.09392507051428159, "min": 0.05155490450561047, "max": 0.11744340111811956, "count": 2582 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.09439446131388346, "min": 0.05229328287144502, "max": 0.11891336192687353, "count": 2582 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.09439446131388346, "min": 0.05229328287144502, "max": 0.11891336192687353, "count": 2582 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2582 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.0003, "min": 0.0003, "max": 0.0003, "count": 2582 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2582 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.20000000000000007, "min": 0.20000000000000007, "max": 0.20000000000000007, "count": 2582 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2582 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.005000000000000001, "min": 0.005000000000000001, "max": 0.005000000000000001, "count": 2582 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1677851997", "python_version": "3.9.16 (main, Jan 11 2023, 16:16:36) [MSC v.1916 64 bit (AMD64)]", "command_line_arguments": "D:\\ProgramData\\anaconda3\\envs\\rl\\Scripts\\mlagents-learn ./config/poca/SoccerTwos2.yaml --env=./training-envs-executables/SoccerTwos.exe --run-id=SoccerTwos75M --no-graphics --resume", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.13.1+cu117", "numpy_version": "1.21.2", "end_time_seconds": "1677984806" }, "total": 132802.3624939, "count": 1, "self": 0.503199400001904, "children": { "run_training.setup": { "total": 0.25194579999999966, "count": 1, "self": 0.25194579999999966 }, "TrainerController.start_learning": { "total": 132801.6073487, "count": 1, "self": 106.21489459142322, "children": { "TrainerController._reset_env": { "total": 14.94541020004875, "count": 356, "self": 14.94541020004875 }, "TrainerController.advance": { "total": 132680.1063593085, "count": 3651421, "self": 116.81544708908768, "children": { "env_step": { "total": 97373.60285030825, "count": 3651421, "self": 59819.65744699524, "children": { "SubprocessEnvManager._take_step": { "total": 37492.54208541678, "count": 3651421, "self": 728.4675793360511, "children": { "TorchPolicy.evaluate": { "total": 36764.07450608073, "count": 6689806, "self": 36764.07450608073 } } }, "workers": { "total": 61.403317896226326, "count": 3651421, "self": 0.0, "children": { "worker_root": { "total": 132605.69661230172, "count": 3651421, "is_parallel": true, "self": 83684.95475629717, "children": { "steps_from_proto": { "total": 0.7061807999941747, "count": 712, "is_parallel": true, "self": 0.1497219996936865, "children": { "_process_rank_one_or_two_observation": { "total": 0.5564588003004882, "count": 2848, "is_parallel": true, "self": 0.5564588003004882 } } }, "UnityEnvironment.step": { "total": 48920.03567520455, "count": 3651421, "is_parallel": true, "self": 2060.1273715352, "children": { "UnityEnvironment._generate_step_input": { "total": 1654.2546377861886, "count": 3651421, "is_parallel": true, "self": 1654.2546377861886 }, "communicator.exchange": { "total": 38662.078741287434, "count": 3651421, "is_parallel": true, "self": 38662.078741287434 }, "steps_from_proto": { "total": 6543.574924595727, "count": 7302842, "is_parallel": true, "self": 1391.2318005020497, "children": { "_process_rank_one_or_two_observation": { "total": 5152.343124093677, "count": 29211368, "is_parallel": true, "self": 5152.343124093677 } } } } } } } } } } }, "trainer_advance": { "total": 35189.68806191116, "count": 3651421, "self": 768.2235237175992, "children": { "process_trajectory": { "total": 17733.85331909333, "count": 3651421, "self": 17697.212138093342, "children": { "RLTrainer._checkpoint": { "total": 36.64118099998916, "count": 107, "self": 36.64118099998916 } } }, "_update_policy": { "total": 16687.611219100232, "count": 2582, "self": 9642.20433590026, "children": { "TorchPOCAOptimizer.update": { "total": 7045.406883199973, "count": 77460, "self": 7045.406883199973 } } } } } } }, "trainer_threads": { "total": 1.600012183189392e-06, "count": 1, "self": 1.600012183189392e-06 }, "TrainerController._save_models": { "total": 0.3406830000167247, "count": 1, "self": 0.006663300009677187, "children": { "RLTrainer._checkpoint": { "total": 0.3340197000070475, "count": 1, "self": 0.3340197000070475 } } } } } } }