Explore Results
-
PPO 9766 Iterations Parallel
{ "stop": { "timesteps_total": 5000000, "episode_len_mean": 300, "training_iteration": 1000000, "episode_reward_mean": 50 }, "config": { "lr": 2e-05, "env": "knights_archers_zombies_v10", "seed": null, "gamma": 0.99, "input": "sampler", "model": { "dim": 84, "use_lstm": false, "grayscale": false, "zero_mean": true, "framestack": true, "_time_major": false, "max_seq_len": 20, "conv_filters": null, "custom_model": null, "free_log_std": false, "attention_dim": 64, "fcnet_hiddens": [ 256, 256 ], "use_attention": false, "lstm_cell_size": 256, "conv_activation": "relu", "no_final_linear": false, "vf_share_layers": false, "fcnet_activation": "tanh", "attention_head_dim": 32, "custom_action_dist": null, "encoder_latent_dim": null, "post_fcnet_hiddens": [], "always_check_shapes": false, "attention_num_heads": 1, "custom_model_config": {}, "custom_preprocessor": null, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "conv_bias_initializer": null, "lstm_bias_initializer": null, "post_fcnet_activation": "relu", "fcnet_bias_initializer": null, "conv_kernel_initializer": null, "lstm_weights_initializer": null, "_disable_preprocessor_api": false, "attention_memory_training": 50, "fcnet_weights_initializer": null, "_disable_action_flattening": false, "_use_default_native_models": -1, "attention_memory_inference": 50, "lstm_use_prev_action_reward": -1, "post_fcnet_bias_initializer": null, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "conv_bias_initializer_config": null, "lstm_bias_initializer_config": null, "fcnet_bias_initializer_config": null, "conv_kernel_initializer_config": null, "post_fcnet_weights_initializer": null, "attention_num_transformer_units": 1, "attention_position_wise_mlp_dim": 32, "conv_transpose_bias_initializer": null, "lstm_weights_initializer_config": null, "fcnet_weights_initializer_config": null, "conv_transpose_kernel_initializer": null, "post_fcnet_bias_initializer_config": null, "post_fcnet_weights_initializer_config": null, "conv_transpose_bias_initializer_config": null, "conv_transpose_kernel_initializer_config": null }, "lambda": 0.9, "output": null, "explore": true, "use_gae": true, "kl_coeff": 0.2, "num_gpus": 0.0, "policies": { "default_policy": [ null, null, null, null ] }, "_is_atari": null, "callbacks": "ray.rllib.algorithms.callbacks.DefaultCallbacks", "framework": "torch", "grad_clip": null, "kl_target": 0.01, "log_level": "ERROR", "optimizer": {}, "_fake_gpus": false, "batch_mode": "truncate_episodes", "clip_param": 0.4, "env_config": {}, "render_env": false, "use_critic": true, "worker_cls": -1, "env_task_fn": null, "lr_schedule": null, "use_kl_loss": true, "action_space": null, "clip_actions": true, "clip_rewards": null, "fake_sampler": false, "grad_clip_by": "global_norm", "input_config": {}, "num_learners": 0, "num_sgd_iter": 10, "eager_tracing": true, "entropy_coeff": 0.1, "in_evaluation": false, "local_gpu_idx": 0, "log_sys_usage": true, "logger_config": null, "output_config": {}, "vf_clip_param": 10.0, "vf_loss_coeff": 0.25, "_learner_class": null, "count_steps_by": "env_steps", "env_runner_cls": null, "logger_creator": null, "observation_fn": null, "_rl_module_spec": null, "action_mask_key": "action_mask", "num_env_runners": 11, "tf_session_args": { "gpu_options": { "allow_growth": true }, "device_count": { "CPU": 1 }, "allow_soft_placement": true, "log_device_placement": false, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2 }, "vf_share_layers": -1, "offline_sampling": false, "policy_map_cache": -1, "prelearner_class": null, "sample_collector": "ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector", "sample_timeout_s": 60.0, "simple_optimizer": -1, "train_batch_size": 512, "enable_connectors": true, "evaluation_config": null, "input_read_method": "read_parquet", "input_read_schema": {}, "normalize_actions": true, "observation_space": null, "policies_to_train": null, "policy_mapping_fn": "ray.rllib.algorithms.algorithm_config.DEFAULT_POLICY_MAPPING_FN", "preprocessor_pref": "deepmind", "shuffle_sequences": true, "_learner_connector": null, "_model_config_dict": {}, "eager_max_retraces": 20, "exploration_config": { "type": "StochasticSampling" }, "map_batches_kwargs": {}, "observation_filter": "NoFilter", "placement_strategy": "PACK", "postprocess_inputs": false, "remote_worker_envs": false, "sgd_minibatch_size": 64, "evaluation_duration": 10, "evaluation_interval": null, "iter_batches_kwargs": {}, "learner_config_dict": {}, "policy_map_capacity": 100, "shuffle_buffer_size": 0, "synchronize_filters": -1, "create_env_on_driver": false, "custom_eval_function": null, "disable_env_checking": -1, "num_cpus_per_learner": 1, "num_gpus_per_learner": 0, "output_max_file_size": 67108864, "torch_compile_worker": false, "_enable_rl_module_api": -1, "_per_module_overrides": {}, "compress_observations": false, "local_tf_session_args": { "inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8 }, "torch_compile_learner": false, "auto_wrap_old_gym_envs": -1, "entropy_coeff_schedule": null, "replay_sequence_length": null, "enable_async_evaluation": -1, "enable_tf1_exec_eagerly": false, "num_cpus_per_env_runner": 1, "num_envs_per_env_runner": 1, "num_gpus_per_env_runner": 0, "output_compress_columns": [ "obs", "new_obs" ], "rollout_fragment_length": "auto", "use_worker_filter_stats": true, "_env_to_module_connector": null, "_module_to_env_connector": null, "episode_lookback_horizon": 1, "evaluation_duration_unit": "episodes", "input_read_method_kwargs": {}, "min_time_s_per_iteration": null, "remote_env_batch_wait_ms": 0, "_disable_preprocessor_api": false, "export_native_model_files": false, "num_cpus_for_main_process": 1, "_disable_action_flattening": false, "evaluation_num_env_runners": 0, "ignore_env_runner_failures": false, "ope_split_batch_by_episode": true, "update_worker_filter_stats": true, "_disable_execution_plan_api": -1, "actions_in_input_normalized": false, "evaluation_sample_timeout_s": 120.0, "max_num_env_runner_restarts": 1000, "mini_batch_size_per_learner": null, "policy_states_are_swappable": false, "recreate_failed_env_runners": false, "sampler_perf_stats_ema_coef": null, "enable_rl_module_and_learner": false, "env_runner_restore_timeout_s": 1800, "train_batch_size_per_learner": null, "dataset_num_iters_per_learner": null, "off_policy_estimation_methods": {}, "_run_training_always_in_thread": false, "prelearner_module_synch_period": 10, "custom_resources_per_env_runner": {}, "evaluation_parallel_to_training": false, "keep_per_episode_custom_metrics": false, "restart_failed_sub_environments": false, "always_attach_evaluation_results": -1, "custom_async_evaluation_function": -1, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "torch_compile_worker_dynamo_mode": null, "_dont_auto_sync_env_runner_states": false, "env_runner_health_probe_timeout_s": 30, "min_train_timesteps_per_iteration": 0, "torch_compile_learner_dynamo_mode": null, "checkpoint_trainable_policies_only": false, "enable_env_runner_and_connector_v2": false, "metrics_num_episodes_for_smoothing": 100, "min_sample_timesteps_per_iteration": 0, "delay_between_env_runner_restarts_s": 60.0, "torch_compile_worker_dynamo_backend": "onnxrt", "metrics_episode_collection_timeout_s": 60.0, "torch_compile_learner_dynamo_backend": "inductor", "_tf_policy_handles_more_than_one_loss": false, "algorithm_config_overrides_per_module": {}, "torch_compile_learner_what_to_compile": "forward_train", "validate_env_runners_after_construction": true, "_disable_initialize_loss_from_dummy_batch": false, "max_requests_in_flight_per_sampler_worker": 2, "sync_filters_on_rollout_workers_timeout_s": 10.0, "_AlgorithmConfig__prior_exploration_config": null, "_evaluation_parallel_to_training_wo_thread": false, "add_default_connectors_to_learner_pipeline": true, "evaluation_force_reset_envs_before_iteration": true, "num_consecutive_env_runner_failures_tolerance": 100, "add_default_connectors_to_env_to_module_pipeline": true, "add_default_connectors_to_module_to_env_pipeline": true }, "storage_path": "/tmp/ray_training_rtpnxl6x", "checkpoint_freq": 10, "checkpoint_at_end": true } -
PPO 9766 Iterations Parallel
{ "stop": { "timesteps_total": 5000000, "episode_len_mean": 300, "training_iteration": 1000000, "episode_reward_mean": 50 }, "config": { "lr": 2e-05, "env": "knights_archers_zombies_v10", "seed": null, "gamma": 0.99, "input": "sampler", "model": { "dim": 84, "use_lstm": false, "grayscale": false, "zero_mean": true, "framestack": true, "_time_major": false, "max_seq_len": 20, "conv_filters": null, "custom_model": null, "free_log_std": false, "attention_dim": 64, "fcnet_hiddens": [ 256, 256 ], "use_attention": false, "lstm_cell_size": 256, "conv_activation": "relu", "no_final_linear": false, "vf_share_layers": false, "fcnet_activation": "tanh", "attention_head_dim": 32, "custom_action_dist": null, "encoder_latent_dim": null, "post_fcnet_hiddens": [], "always_check_shapes": false, "attention_num_heads": 1, "custom_model_config": {}, "custom_preprocessor": null, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "conv_bias_initializer": null, "lstm_bias_initializer": null, "post_fcnet_activation": "relu", "fcnet_bias_initializer": null, "conv_kernel_initializer": null, "lstm_weights_initializer": null, "_disable_preprocessor_api": false, "attention_memory_training": 50, "fcnet_weights_initializer": null, "_disable_action_flattening": false, "_use_default_native_models": -1, "attention_memory_inference": 50, "lstm_use_prev_action_reward": -1, "post_fcnet_bias_initializer": null, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "conv_bias_initializer_config": null, "lstm_bias_initializer_config": null, "fcnet_bias_initializer_config": null, "conv_kernel_initializer_config": null, "post_fcnet_weights_initializer": null, "attention_num_transformer_units": 1, "attention_position_wise_mlp_dim": 32, "conv_transpose_bias_initializer": null, "lstm_weights_initializer_config": null, "fcnet_weights_initializer_config": null, "conv_transpose_kernel_initializer": null, "post_fcnet_bias_initializer_config": null, "post_fcnet_weights_initializer_config": null, "conv_transpose_bias_initializer_config": null, "conv_transpose_kernel_initializer_config": null }, "lambda": 0.9, "output": null, "explore": true, "use_gae": true, "kl_coeff": 0.2, "num_gpus": 0.0, "policies": { "default_policy": [ null, null, null, null ] }, "_is_atari": null, "callbacks": "ray.rllib.algorithms.callbacks.DefaultCallbacks", "framework": "torch", "grad_clip": null, "kl_target": 0.01, "log_level": "ERROR", "optimizer": {}, "_fake_gpus": false, "batch_mode": "truncate_episodes", "clip_param": 0.4, "env_config": {}, "render_env": false, "use_critic": true, "worker_cls": -1, "env_task_fn": null, "lr_schedule": null, "use_kl_loss": true, "action_space": null, "clip_actions": true, "clip_rewards": null, "fake_sampler": false, "grad_clip_by": "global_norm", "input_config": {}, "num_learners": 0, "num_sgd_iter": 10, "eager_tracing": true, "entropy_coeff": 0.1, "in_evaluation": false, "local_gpu_idx": 0, "log_sys_usage": true, "logger_config": null, "output_config": {}, "vf_clip_param": 10.0, "vf_loss_coeff": 0.25, "_learner_class": null, "count_steps_by": "env_steps", "env_runner_cls": null, "logger_creator": null, "observation_fn": null, "_rl_module_spec": null, "action_mask_key": "action_mask", "num_env_runners": 11, "tf_session_args": { "gpu_options": { "allow_growth": true }, "device_count": { "CPU": 1 }, "allow_soft_placement": true, "log_device_placement": false, "inter_op_parallelism_threads": 2, "intra_op_parallelism_threads": 2 }, "vf_share_layers": -1, "offline_sampling": false, "policy_map_cache": -1, "prelearner_class": null, "sample_collector": "ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector", "sample_timeout_s": 60.0, "simple_optimizer": -1, "train_batch_size": 512, "enable_connectors": true, "evaluation_config": null, "input_read_method": "read_parquet", "input_read_schema": {}, "normalize_actions": true, "observation_space": null, "policies_to_train": null, "policy_mapping_fn": "ray.rllib.algorithms.algorithm_config.DEFAULT_POLICY_MAPPING_FN", "preprocessor_pref": "deepmind", "shuffle_sequences": true, "_learner_connector": null, "_model_config_dict": {}, "eager_max_retraces": 20, "exploration_config": { "type": "StochasticSampling" }, "map_batches_kwargs": {}, "observation_filter": "NoFilter", "placement_strategy": "PACK", "postprocess_inputs": false, "remote_worker_envs": false, "sgd_minibatch_size": 64, "evaluation_duration": 10, "evaluation_interval": null, "iter_batches_kwargs": {}, "learner_config_dict": {}, "policy_map_capacity": 100, "shuffle_buffer_size": 0, "synchronize_filters": -1, "create_env_on_driver": false, "custom_eval_function": null, "disable_env_checking": -1, "num_cpus_per_learner": 1, "num_gpus_per_learner": 0, "output_max_file_size": 67108864, "torch_compile_worker": false, "_enable_rl_module_api": -1, "_per_module_overrides": {}, "compress_observations": false, "local_tf_session_args": { "inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8 }, "torch_compile_learner": false, "auto_wrap_old_gym_envs": -1, "entropy_coeff_schedule": null, "replay_sequence_length": null, "enable_async_evaluation": -1, "enable_tf1_exec_eagerly": false, "num_cpus_per_env_runner": 1, "num_envs_per_env_runner": 1, "num_gpus_per_env_runner": 0, "output_compress_columns": [ "obs", "new_obs" ], "rollout_fragment_length": "auto", "use_worker_filter_stats": true, "_env_to_module_connector": null, "_module_to_env_connector": null, "episode_lookback_horizon": 1, "evaluation_duration_unit": "episodes", "input_read_method_kwargs": {}, "min_time_s_per_iteration": null, "remote_env_batch_wait_ms": 0, "_disable_preprocessor_api": false, "export_native_model_files": false, "num_cpus_for_main_process": 1, "_disable_action_flattening": false, "evaluation_num_env_runners": 0, "ignore_env_runner_failures": false, "ope_split_batch_by_episode": true, "update_worker_filter_stats": true, "_disable_execution_plan_api": -1, "actions_in_input_normalized": false, "evaluation_sample_timeout_s": 120.0, "max_num_env_runner_restarts": 1000, "mini_batch_size_per_learner": null, "policy_states_are_swappable": false, "recreate_failed_env_runners": false, "sampler_perf_stats_ema_coef": null, "enable_rl_module_and_learner": false, "env_runner_restore_timeout_s": 1800, "train_batch_size_per_learner": null, "dataset_num_iters_per_learner": null, "off_policy_estimation_methods": {}, "_run_training_always_in_thread": false, "prelearner_module_synch_period": 10, "custom_resources_per_env_runner": {}, "evaluation_parallel_to_training": false, "keep_per_episode_custom_metrics": false, "restart_failed_sub_environments": false, "always_attach_evaluation_results": -1, "custom_async_evaluation_function": -1, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "torch_compile_worker_dynamo_mode": null, "_dont_auto_sync_env_runner_states": false, "env_runner_health_probe_timeout_s": 30, "min_train_timesteps_per_iteration": 0, "torch_compile_learner_dynamo_mode": null, "checkpoint_trainable_policies_only": false, "enable_env_runner_and_connector_v2": false, "metrics_num_episodes_for_smoothing": 100, "min_sample_timesteps_per_iteration": 0, "delay_between_env_runner_restarts_s": 60.0, "torch_compile_worker_dynamo_backend": "onnxrt", "metrics_episode_collection_timeout_s": 60.0, "torch_compile_learner_dynamo_backend": "inductor", "_tf_policy_handles_more_than_one_loss": false, "algorithm_config_overrides_per_module": {}, "torch_compile_learner_what_to_compile": "forward_train", "validate_env_runners_after_construction": true, "_disable_initialize_loss_from_dummy_batch": false, "max_requests_in_flight_per_sampler_worker": 2, "sync_filters_on_rollout_workers_timeout_s": 10.0, "_AlgorithmConfig__prior_exploration_config": null, "_evaluation_parallel_to_training_wo_thread": false, "add_default_connectors_to_learner_pipeline": true, "evaluation_force_reset_envs_before_iteration": true, "num_consecutive_env_runner_failures_tolerance": 100, "add_default_connectors_to_env_to_module_pipeline": true, "add_default_connectors_to_module_to_env_pipeline": true }, "storage_path": "/tmp/ray_training_rtpnxl6x", "checkpoint_freq": 10, "checkpoint_at_end": true }