Explore Results

  1. PPO 9766 Iterations Parallel
    {
      "stop": {
        "timesteps_total": 5000000,
        "episode_len_mean": 300,
        "training_iteration": 1000000,
        "episode_reward_mean": 50
      },
      "config": {
        "lr": 2e-05,
        "env": "knights_archers_zombies_v10",
        "seed": null,
        "gamma": 0.99,
        "input": "sampler",
        "model": {
          "dim": 84,
          "use_lstm": false,
          "grayscale": false,
          "zero_mean": true,
          "framestack": true,
          "_time_major": false,
          "max_seq_len": 20,
          "conv_filters": null,
          "custom_model": null,
          "free_log_std": false,
          "attention_dim": 64,
          "fcnet_hiddens": [
            256,
            256
          ],
          "use_attention": false,
          "lstm_cell_size": 256,
          "conv_activation": "relu",
          "no_final_linear": false,
          "vf_share_layers": false,
          "fcnet_activation": "tanh",
          "attention_head_dim": 32,
          "custom_action_dist": null,
          "encoder_latent_dim": null,
          "post_fcnet_hiddens": [],
          "always_check_shapes": false,
          "attention_num_heads": 1,
          "custom_model_config": {},
          "custom_preprocessor": null,
          "lstm_use_prev_action": false,
          "lstm_use_prev_reward": false,
          "conv_bias_initializer": null,
          "lstm_bias_initializer": null,
          "post_fcnet_activation": "relu",
          "fcnet_bias_initializer": null,
          "conv_kernel_initializer": null,
          "lstm_weights_initializer": null,
          "_disable_preprocessor_api": false,
          "attention_memory_training": 50,
          "fcnet_weights_initializer": null,
          "_disable_action_flattening": false,
          "_use_default_native_models": -1,
          "attention_memory_inference": 50,
          "lstm_use_prev_action_reward": -1,
          "post_fcnet_bias_initializer": null,
          "attention_init_gru_gate_bias": 2.0,
          "attention_use_n_prev_actions": 0,
          "attention_use_n_prev_rewards": 0,
          "conv_bias_initializer_config": null,
          "lstm_bias_initializer_config": null,
          "fcnet_bias_initializer_config": null,
          "conv_kernel_initializer_config": null,
          "post_fcnet_weights_initializer": null,
          "attention_num_transformer_units": 1,
          "attention_position_wise_mlp_dim": 32,
          "conv_transpose_bias_initializer": null,
          "lstm_weights_initializer_config": null,
          "fcnet_weights_initializer_config": null,
          "conv_transpose_kernel_initializer": null,
          "post_fcnet_bias_initializer_config": null,
          "post_fcnet_weights_initializer_config": null,
          "conv_transpose_bias_initializer_config": null,
          "conv_transpose_kernel_initializer_config": null
        },
        "lambda": 0.9,
        "output": null,
        "explore": true,
        "use_gae": true,
        "kl_coeff": 0.2,
        "num_gpus": 0.0,
        "policies": {
          "default_policy": [
            null,
            null,
            null,
            null
          ]
        },
        "_is_atari": null,
        "callbacks": "ray.rllib.algorithms.callbacks.DefaultCallbacks",
        "framework": "torch",
        "grad_clip": null,
        "kl_target": 0.01,
        "log_level": "ERROR",
        "optimizer": {},
        "_fake_gpus": false,
        "batch_mode": "truncate_episodes",
        "clip_param": 0.4,
        "env_config": {},
        "render_env": false,
        "use_critic": true,
        "worker_cls": -1,
        "env_task_fn": null,
        "lr_schedule": null,
        "use_kl_loss": true,
        "action_space": null,
        "clip_actions": true,
        "clip_rewards": null,
        "fake_sampler": false,
        "grad_clip_by": "global_norm",
        "input_config": {},
        "num_learners": 0,
        "num_sgd_iter": 10,
        "eager_tracing": true,
        "entropy_coeff": 0.1,
        "in_evaluation": false,
        "local_gpu_idx": 0,
        "log_sys_usage": true,
        "logger_config": null,
        "output_config": {},
        "vf_clip_param": 10.0,
        "vf_loss_coeff": 0.25,
        "_learner_class": null,
        "count_steps_by": "env_steps",
        "env_runner_cls": null,
        "logger_creator": null,
        "observation_fn": null,
        "_rl_module_spec": null,
        "action_mask_key": "action_mask",
        "num_env_runners": 11,
        "tf_session_args": {
          "gpu_options": {
            "allow_growth": true
          },
          "device_count": {
            "CPU": 1
          },
          "allow_soft_placement": true,
          "log_device_placement": false,
          "inter_op_parallelism_threads": 2,
          "intra_op_parallelism_threads": 2
        },
        "vf_share_layers": -1,
        "offline_sampling": false,
        "policy_map_cache": -1,
        "prelearner_class": null,
        "sample_collector": "ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector",
        "sample_timeout_s": 60.0,
        "simple_optimizer": -1,
        "train_batch_size": 512,
        "enable_connectors": true,
        "evaluation_config": null,
        "input_read_method": "read_parquet",
        "input_read_schema": {},
        "normalize_actions": true,
        "observation_space": null,
        "policies_to_train": null,
        "policy_mapping_fn": "ray.rllib.algorithms.algorithm_config.DEFAULT_POLICY_MAPPING_FN",
        "preprocessor_pref": "deepmind",
        "shuffle_sequences": true,
        "_learner_connector": null,
        "_model_config_dict": {},
        "eager_max_retraces": 20,
        "exploration_config": {
          "type": "StochasticSampling"
        },
        "map_batches_kwargs": {},
        "observation_filter": "NoFilter",
        "placement_strategy": "PACK",
        "postprocess_inputs": false,
        "remote_worker_envs": false,
        "sgd_minibatch_size": 64,
        "evaluation_duration": 10,
        "evaluation_interval": null,
        "iter_batches_kwargs": {},
        "learner_config_dict": {},
        "policy_map_capacity": 100,
        "shuffle_buffer_size": 0,
        "synchronize_filters": -1,
        "create_env_on_driver": false,
        "custom_eval_function": null,
        "disable_env_checking": -1,
        "num_cpus_per_learner": 1,
        "num_gpus_per_learner": 0,
        "output_max_file_size": 67108864,
        "torch_compile_worker": false,
        "_enable_rl_module_api": -1,
        "_per_module_overrides": {},
        "compress_observations": false,
        "local_tf_session_args": {
          "inter_op_parallelism_threads": 8,
          "intra_op_parallelism_threads": 8
        },
        "torch_compile_learner": false,
        "auto_wrap_old_gym_envs": -1,
        "entropy_coeff_schedule": null,
        "replay_sequence_length": null,
        "enable_async_evaluation": -1,
        "enable_tf1_exec_eagerly": false,
        "num_cpus_per_env_runner": 1,
        "num_envs_per_env_runner": 1,
        "num_gpus_per_env_runner": 0,
        "output_compress_columns": [
          "obs",
          "new_obs"
        ],
        "rollout_fragment_length": "auto",
        "use_worker_filter_stats": true,
        "_env_to_module_connector": null,
        "_module_to_env_connector": null,
        "episode_lookback_horizon": 1,
        "evaluation_duration_unit": "episodes",
        "input_read_method_kwargs": {},
        "min_time_s_per_iteration": null,
        "remote_env_batch_wait_ms": 0,
        "_disable_preprocessor_api": false,
        "export_native_model_files": false,
        "num_cpus_for_main_process": 1,
        "_disable_action_flattening": false,
        "evaluation_num_env_runners": 0,
        "ignore_env_runner_failures": false,
        "ope_split_batch_by_episode": true,
        "update_worker_filter_stats": true,
        "_disable_execution_plan_api": -1,
        "actions_in_input_normalized": false,
        "evaluation_sample_timeout_s": 120.0,
        "max_num_env_runner_restarts": 1000,
        "mini_batch_size_per_learner": null,
        "policy_states_are_swappable": false,
        "recreate_failed_env_runners": false,
        "sampler_perf_stats_ema_coef": null,
        "enable_rl_module_and_learner": false,
        "env_runner_restore_timeout_s": 1800,
        "train_batch_size_per_learner": null,
        "dataset_num_iters_per_learner": null,
        "off_policy_estimation_methods": {},
        "_run_training_always_in_thread": false,
        "prelearner_module_synch_period": 10,
        "custom_resources_per_env_runner": {},
        "evaluation_parallel_to_training": false,
        "keep_per_episode_custom_metrics": false,
        "restart_failed_sub_environments": false,
        "always_attach_evaluation_results": -1,
        "custom_async_evaluation_function": -1,
        "extra_python_environs_for_driver": {},
        "extra_python_environs_for_worker": {},
        "torch_compile_worker_dynamo_mode": null,
        "_dont_auto_sync_env_runner_states": false,
        "env_runner_health_probe_timeout_s": 30,
        "min_train_timesteps_per_iteration": 0,
        "torch_compile_learner_dynamo_mode": null,
        "checkpoint_trainable_policies_only": false,
        "enable_env_runner_and_connector_v2": false,
        "metrics_num_episodes_for_smoothing": 100,
        "min_sample_timesteps_per_iteration": 0,
        "delay_between_env_runner_restarts_s": 60.0,
        "torch_compile_worker_dynamo_backend": "onnxrt",
        "metrics_episode_collection_timeout_s": 60.0,
        "torch_compile_learner_dynamo_backend": "inductor",
        "_tf_policy_handles_more_than_one_loss": false,
        "algorithm_config_overrides_per_module": {},
        "torch_compile_learner_what_to_compile": "forward_train",
        "validate_env_runners_after_construction": true,
        "_disable_initialize_loss_from_dummy_batch": false,
        "max_requests_in_flight_per_sampler_worker": 2,
        "sync_filters_on_rollout_workers_timeout_s": 10.0,
        "_AlgorithmConfig__prior_exploration_config": null,
        "_evaluation_parallel_to_training_wo_thread": false,
        "add_default_connectors_to_learner_pipeline": true,
        "evaluation_force_reset_envs_before_iteration": true,
        "num_consecutive_env_runner_failures_tolerance": 100,
        "add_default_connectors_to_env_to_module_pipeline": true,
        "add_default_connectors_to_module_to_env_pipeline": true
      },
      "storage_path": "/tmp/ray_training_rtpnxl6x",
      "checkpoint_freq": 10,
      "checkpoint_at_end": true
    }
    {}
  2. PPO 9766 Iterations Parallel
    {
      "stop": {
        "timesteps_total": 5000000,
        "episode_len_mean": 300,
        "training_iteration": 1000000,
        "episode_reward_mean": 50
      },
      "config": {
        "lr": 2e-05,
        "env": "knights_archers_zombies_v10",
        "seed": null,
        "gamma": 0.99,
        "input": "sampler",
        "model": {
          "dim": 84,
          "use_lstm": false,
          "grayscale": false,
          "zero_mean": true,
          "framestack": true,
          "_time_major": false,
          "max_seq_len": 20,
          "conv_filters": null,
          "custom_model": null,
          "free_log_std": false,
          "attention_dim": 64,
          "fcnet_hiddens": [
            256,
            256
          ],
          "use_attention": false,
          "lstm_cell_size": 256,
          "conv_activation": "relu",
          "no_final_linear": false,
          "vf_share_layers": false,
          "fcnet_activation": "tanh",
          "attention_head_dim": 32,
          "custom_action_dist": null,
          "encoder_latent_dim": null,
          "post_fcnet_hiddens": [],
          "always_check_shapes": false,
          "attention_num_heads": 1,
          "custom_model_config": {},
          "custom_preprocessor": null,
          "lstm_use_prev_action": false,
          "lstm_use_prev_reward": false,
          "conv_bias_initializer": null,
          "lstm_bias_initializer": null,
          "post_fcnet_activation": "relu",
          "fcnet_bias_initializer": null,
          "conv_kernel_initializer": null,
          "lstm_weights_initializer": null,
          "_disable_preprocessor_api": false,
          "attention_memory_training": 50,
          "fcnet_weights_initializer": null,
          "_disable_action_flattening": false,
          "_use_default_native_models": -1,
          "attention_memory_inference": 50,
          "lstm_use_prev_action_reward": -1,
          "post_fcnet_bias_initializer": null,
          "attention_init_gru_gate_bias": 2.0,
          "attention_use_n_prev_actions": 0,
          "attention_use_n_prev_rewards": 0,
          "conv_bias_initializer_config": null,
          "lstm_bias_initializer_config": null,
          "fcnet_bias_initializer_config": null,
          "conv_kernel_initializer_config": null,
          "post_fcnet_weights_initializer": null,
          "attention_num_transformer_units": 1,
          "attention_position_wise_mlp_dim": 32,
          "conv_transpose_bias_initializer": null,
          "lstm_weights_initializer_config": null,
          "fcnet_weights_initializer_config": null,
          "conv_transpose_kernel_initializer": null,
          "post_fcnet_bias_initializer_config": null,
          "post_fcnet_weights_initializer_config": null,
          "conv_transpose_bias_initializer_config": null,
          "conv_transpose_kernel_initializer_config": null
        },
        "lambda": 0.9,
        "output": null,
        "explore": true,
        "use_gae": true,
        "kl_coeff": 0.2,
        "num_gpus": 0.0,
        "policies": {
          "default_policy": [
            null,
            null,
            null,
            null
          ]
        },
        "_is_atari": null,
        "callbacks": "ray.rllib.algorithms.callbacks.DefaultCallbacks",
        "framework": "torch",
        "grad_clip": null,
        "kl_target": 0.01,
        "log_level": "ERROR",
        "optimizer": {},
        "_fake_gpus": false,
        "batch_mode": "truncate_episodes",
        "clip_param": 0.4,
        "env_config": {},
        "render_env": false,
        "use_critic": true,
        "worker_cls": -1,
        "env_task_fn": null,
        "lr_schedule": null,
        "use_kl_loss": true,
        "action_space": null,
        "clip_actions": true,
        "clip_rewards": null,
        "fake_sampler": false,
        "grad_clip_by": "global_norm",
        "input_config": {},
        "num_learners": 0,
        "num_sgd_iter": 10,
        "eager_tracing": true,
        "entropy_coeff": 0.1,
        "in_evaluation": false,
        "local_gpu_idx": 0,
        "log_sys_usage": true,
        "logger_config": null,
        "output_config": {},
        "vf_clip_param": 10.0,
        "vf_loss_coeff": 0.25,
        "_learner_class": null,
        "count_steps_by": "env_steps",
        "env_runner_cls": null,
        "logger_creator": null,
        "observation_fn": null,
        "_rl_module_spec": null,
        "action_mask_key": "action_mask",
        "num_env_runners": 11,
        "tf_session_args": {
          "gpu_options": {
            "allow_growth": true
          },
          "device_count": {
            "CPU": 1
          },
          "allow_soft_placement": true,
          "log_device_placement": false,
          "inter_op_parallelism_threads": 2,
          "intra_op_parallelism_threads": 2
        },
        "vf_share_layers": -1,
        "offline_sampling": false,
        "policy_map_cache": -1,
        "prelearner_class": null,
        "sample_collector": "ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector",
        "sample_timeout_s": 60.0,
        "simple_optimizer": -1,
        "train_batch_size": 512,
        "enable_connectors": true,
        "evaluation_config": null,
        "input_read_method": "read_parquet",
        "input_read_schema": {},
        "normalize_actions": true,
        "observation_space": null,
        "policies_to_train": null,
        "policy_mapping_fn": "ray.rllib.algorithms.algorithm_config.DEFAULT_POLICY_MAPPING_FN",
        "preprocessor_pref": "deepmind",
        "shuffle_sequences": true,
        "_learner_connector": null,
        "_model_config_dict": {},
        "eager_max_retraces": 20,
        "exploration_config": {
          "type": "StochasticSampling"
        },
        "map_batches_kwargs": {},
        "observation_filter": "NoFilter",
        "placement_strategy": "PACK",
        "postprocess_inputs": false,
        "remote_worker_envs": false,
        "sgd_minibatch_size": 64,
        "evaluation_duration": 10,
        "evaluation_interval": null,
        "iter_batches_kwargs": {},
        "learner_config_dict": {},
        "policy_map_capacity": 100,
        "shuffle_buffer_size": 0,
        "synchronize_filters": -1,
        "create_env_on_driver": false,
        "custom_eval_function": null,
        "disable_env_checking": -1,
        "num_cpus_per_learner": 1,
        "num_gpus_per_learner": 0,
        "output_max_file_size": 67108864,
        "torch_compile_worker": false,
        "_enable_rl_module_api": -1,
        "_per_module_overrides": {},
        "compress_observations": false,
        "local_tf_session_args": {
          "inter_op_parallelism_threads": 8,
          "intra_op_parallelism_threads": 8
        },
        "torch_compile_learner": false,
        "auto_wrap_old_gym_envs": -1,
        "entropy_coeff_schedule": null,
        "replay_sequence_length": null,
        "enable_async_evaluation": -1,
        "enable_tf1_exec_eagerly": false,
        "num_cpus_per_env_runner": 1,
        "num_envs_per_env_runner": 1,
        "num_gpus_per_env_runner": 0,
        "output_compress_columns": [
          "obs",
          "new_obs"
        ],
        "rollout_fragment_length": "auto",
        "use_worker_filter_stats": true,
        "_env_to_module_connector": null,
        "_module_to_env_connector": null,
        "episode_lookback_horizon": 1,
        "evaluation_duration_unit": "episodes",
        "input_read_method_kwargs": {},
        "min_time_s_per_iteration": null,
        "remote_env_batch_wait_ms": 0,
        "_disable_preprocessor_api": false,
        "export_native_model_files": false,
        "num_cpus_for_main_process": 1,
        "_disable_action_flattening": false,
        "evaluation_num_env_runners": 0,
        "ignore_env_runner_failures": false,
        "ope_split_batch_by_episode": true,
        "update_worker_filter_stats": true,
        "_disable_execution_plan_api": -1,
        "actions_in_input_normalized": false,
        "evaluation_sample_timeout_s": 120.0,
        "max_num_env_runner_restarts": 1000,
        "mini_batch_size_per_learner": null,
        "policy_states_are_swappable": false,
        "recreate_failed_env_runners": false,
        "sampler_perf_stats_ema_coef": null,
        "enable_rl_module_and_learner": false,
        "env_runner_restore_timeout_s": 1800,
        "train_batch_size_per_learner": null,
        "dataset_num_iters_per_learner": null,
        "off_policy_estimation_methods": {},
        "_run_training_always_in_thread": false,
        "prelearner_module_synch_period": 10,
        "custom_resources_per_env_runner": {},
        "evaluation_parallel_to_training": false,
        "keep_per_episode_custom_metrics": false,
        "restart_failed_sub_environments": false,
        "always_attach_evaluation_results": -1,
        "custom_async_evaluation_function": -1,
        "extra_python_environs_for_driver": {},
        "extra_python_environs_for_worker": {},
        "torch_compile_worker_dynamo_mode": null,
        "_dont_auto_sync_env_runner_states": false,
        "env_runner_health_probe_timeout_s": 30,
        "min_train_timesteps_per_iteration": 0,
        "torch_compile_learner_dynamo_mode": null,
        "checkpoint_trainable_policies_only": false,
        "enable_env_runner_and_connector_v2": false,
        "metrics_num_episodes_for_smoothing": 100,
        "min_sample_timesteps_per_iteration": 0,
        "delay_between_env_runner_restarts_s": 60.0,
        "torch_compile_worker_dynamo_backend": "onnxrt",
        "metrics_episode_collection_timeout_s": 60.0,
        "torch_compile_learner_dynamo_backend": "inductor",
        "_tf_policy_handles_more_than_one_loss": false,
        "algorithm_config_overrides_per_module": {},
        "torch_compile_learner_what_to_compile": "forward_train",
        "validate_env_runners_after_construction": true,
        "_disable_initialize_loss_from_dummy_batch": false,
        "max_requests_in_flight_per_sampler_worker": 2,
        "sync_filters_on_rollout_workers_timeout_s": 10.0,
        "_AlgorithmConfig__prior_exploration_config": null,
        "_evaluation_parallel_to_training_wo_thread": false,
        "add_default_connectors_to_learner_pipeline": true,
        "evaluation_force_reset_envs_before_iteration": true,
        "num_consecutive_env_runner_failures_tolerance": 100,
        "add_default_connectors_to_env_to_module_pipeline": true,
        "add_default_connectors_to_module_to_env_pipeline": true
      },
      "storage_path": "/tmp/ray_training_rtpnxl6x",
      "checkpoint_freq": 10,
      "checkpoint_at_end": true
    }
    {}

We'd love your feedback