Visual input leads to unobservable and invalid videos.
When I train the model with BC, the generated video is unwatchable when using RGB-form input.
Here is my bc.json.
{
"algo_name": "bc",
"experiment": {
"name": "test",
"validate": false,
"logging": {
"terminal_output_to_txt": true,
"log_tb": true,
"log_wandb": false,
"wandb_proj_name": "debug"
},
"save": {
"enabled": true,
"every_n_seconds": null,
"every_n_epochs": 50,
"epochs": [],
"on_best_validation": false,
"on_best_rollout_return": false,
"on_best_rollout_success_rate": true
},
"epoch_every_n_steps": 100,
"validation_epoch_every_n_steps": 10,
"env": null,
"additional_envs": null,
"render": false,
"render_video": true,
"keep_all_videos": false,
"video_skip": 1,
"rollout": {
"enabled": true,
"n": 50,
"horizon": 400,
"rate": 50,
"warmstart": 0,
"terminate_on_success": true
}
},
"train": {
"data": null,
"output_dir": "../bc_trained_models",
"num_data_workers": 0,
"hdf5_cache_mode": "all",
"hdf5_use_swmr": true,
"hdf5_load_next_obs": false,
"hdf5_normalize_obs": false,
"hdf5_filter_key": null,
"hdf5_validation_filter_key": null,
"seq_length": 1,
"pad_seq_length": true,
"frame_stack": 1,
"pad_frame_stack": true,
"dataset_keys": [
"actions",
"rewards",
"dones"
],
"goal_mode": null,
"cuda": true,
"batch_size": 100,
"num_epochs": 4000,
"seed": 1
},
"algo": {
"optim_params": {
"policy": {
"optimizer_type": "adam",
"learning_rate": {
"initial": 0.0001,
"decay_factor": 0.1,
"epoch_schedule": [],
"scheduler_type": "multistep"
},
"regularization": {
"L2": 0.0
}
}
},
"loss": {
"l2_weight": 1.0,
"l1_weight": 0.0,
"cos_weight": 0.0
},
"actor_layer_dims": [
1024,
1024
],
"gaussian": {
"enabled": false,
"fixed_std": false,
"init_std": 0.1,
"min_std": 0.01,
"std_activation": "softplus",
"low_noise_eval": true
},
"gmm": {
"enabled": false,
"num_modes": 5,
"min_std": 0.0001,
"std_activation": "softplus",
"low_noise_eval": true
},
"vae": {
"enabled": false,
"latent_dim": 14,
"latent_clip": null,
"kl_weight": 1.0,
"decoder": {
"is_conditioned": true,
"reconstruction_sum_across_elements": false
},
"prior": {
"learn": false,
"is_conditioned": false,
"use_gmm": false,
"gmm_num_modes": 10,
"gmm_learn_weights": false,
"use_categorical": false,
"categorical_dim": 10,
"categorical_gumbel_softmax_hard": false,
"categorical_init_temp": 1.0,
"categorical_temp_anneal_step": 0.001,
"categorical_min_temp": 0.3
},
"encoder_layer_dims": [
300,
400
],
"decoder_layer_dims": [
300,
400
],
"prior_layer_dims": [
300,
400
]
},
"rnn": {
"enabled": false,
"horizon": 10,
"hidden_dim": 400,
"rnn_type": "LSTM",
"num_layers": 2,
"open_loop": false,
"kwargs": {
"bidirectional": false
}
},
"transformer": {
"enabled": false,
"context_length": 10,
"embed_dim": 512,
"num_layers": 6,
"num_heads": 8,
"emb_dropout": 0.1,
"attn_dropout": 0.1,
"block_output_dropout": 0.1,
"sinusoidal_embedding": false,
"activation": "gelu",
"supervise_all_steps": false,
"nn_parameter_for_timesteps": true
}
},
"observation": {
"modalities": {
"obs": {
"low_dim": [
],
"rgb": [
"frontview_image"
],
"depth": [
"frontview_depth"
],
"scan": []
},
"goal": {
"low_dim": [],
"rgb": [],
"depth": [],
"scan": []
}
},
"encoder": {
"low_dim": {
"core_class": null,
"core_kwargs": {},
"obs_randomizer_class": null,
"obs_randomizer_kwargs": {}
},
"rgb": {
"core_class": "VisualCore",
"core_kwargs": {},
"obs_randomizer_class": null,
"obs_randomizer_kwargs": {}
},
"depth": {
"core_class": "VisualCore",
"core_kwargs": {},
"obs_randomizer_class": null,
"obs_randomizer_kwargs": {}
},
"scan": {
"core_class": "ScanCore",
"core_kwargs": {},
"obs_randomizer_class": null,
"obs_randomizer_kwargs": {}
}
}
},
"meta": {
"hp_base_config_file": null,
"hp_keys": [],
"hp_values": []
}
}
Visual input leads to unobservable and invalid videos.
When I train the model with BC, the generated video is unwatchable when using RGB-form input.
Here is my
bc.json.{ "algo_name": "bc", "experiment": { "name": "test", "validate": false, "logging": { "terminal_output_to_txt": true, "log_tb": true, "log_wandb": false, "wandb_proj_name": "debug" }, "save": { "enabled": true, "every_n_seconds": null, "every_n_epochs": 50, "epochs": [], "on_best_validation": false, "on_best_rollout_return": false, "on_best_rollout_success_rate": true }, "epoch_every_n_steps": 100, "validation_epoch_every_n_steps": 10, "env": null, "additional_envs": null, "render": false, "render_video": true, "keep_all_videos": false, "video_skip": 1, "rollout": { "enabled": true, "n": 50, "horizon": 400, "rate": 50, "warmstart": 0, "terminate_on_success": true } }, "train": { "data": null, "output_dir": "../bc_trained_models", "num_data_workers": 0, "hdf5_cache_mode": "all", "hdf5_use_swmr": true, "hdf5_load_next_obs": false, "hdf5_normalize_obs": false, "hdf5_filter_key": null, "hdf5_validation_filter_key": null, "seq_length": 1, "pad_seq_length": true, "frame_stack": 1, "pad_frame_stack": true, "dataset_keys": [ "actions", "rewards", "dones" ], "goal_mode": null, "cuda": true, "batch_size": 100, "num_epochs": 4000, "seed": 1 }, "algo": { "optim_params": { "policy": { "optimizer_type": "adam", "learning_rate": { "initial": 0.0001, "decay_factor": 0.1, "epoch_schedule": [], "scheduler_type": "multistep" }, "regularization": { "L2": 0.0 } } }, "loss": { "l2_weight": 1.0, "l1_weight": 0.0, "cos_weight": 0.0 }, "actor_layer_dims": [ 1024, 1024 ], "gaussian": { "enabled": false, "fixed_std": false, "init_std": 0.1, "min_std": 0.01, "std_activation": "softplus", "low_noise_eval": true }, "gmm": { "enabled": false, "num_modes": 5, "min_std": 0.0001, "std_activation": "softplus", "low_noise_eval": true }, "vae": { "enabled": false, "latent_dim": 14, "latent_clip": null, "kl_weight": 1.0, "decoder": { "is_conditioned": true, "reconstruction_sum_across_elements": false }, "prior": { "learn": false, "is_conditioned": false, "use_gmm": false, "gmm_num_modes": 10, "gmm_learn_weights": false, "use_categorical": false, "categorical_dim": 10, "categorical_gumbel_softmax_hard": false, "categorical_init_temp": 1.0, "categorical_temp_anneal_step": 0.001, "categorical_min_temp": 0.3 }, "encoder_layer_dims": [ 300, 400 ], "decoder_layer_dims": [ 300, 400 ], "prior_layer_dims": [ 300, 400 ] }, "rnn": { "enabled": false, "horizon": 10, "hidden_dim": 400, "rnn_type": "LSTM", "num_layers": 2, "open_loop": false, "kwargs": { "bidirectional": false } }, "transformer": { "enabled": false, "context_length": 10, "embed_dim": 512, "num_layers": 6, "num_heads": 8, "emb_dropout": 0.1, "attn_dropout": 0.1, "block_output_dropout": 0.1, "sinusoidal_embedding": false, "activation": "gelu", "supervise_all_steps": false, "nn_parameter_for_timesteps": true } }, "observation": { "modalities": { "obs": { "low_dim": [ ], "rgb": [ "frontview_image" ], "depth": [ "frontview_depth" ], "scan": [] }, "goal": { "low_dim": [], "rgb": [], "depth": [], "scan": [] } }, "encoder": { "low_dim": { "core_class": null, "core_kwargs": {}, "obs_randomizer_class": null, "obs_randomizer_kwargs": {} }, "rgb": { "core_class": "VisualCore", "core_kwargs": {}, "obs_randomizer_class": null, "obs_randomizer_kwargs": {} }, "depth": { "core_class": "VisualCore", "core_kwargs": {}, "obs_randomizer_class": null, "obs_randomizer_kwargs": {} }, "scan": { "core_class": "ScanCore", "core_kwargs": {}, "obs_randomizer_class": null, "obs_randomizer_kwargs": {} } } }, "meta": { "hp_base_config_file": null, "hp_keys": [], "hp_values": [] } }My train script is:
CUDA_VISIBLE_DEVICES=0 python robomimic/scripts/train.py --config robomimic/exps/templates/bc1_img.json --dataset ${my hdf5 data path}And using
rgbas input will result in the followingWarning:When I set
macro_block_size=1 or macro_block_size=Noneinimageio.get_writer(video_paths[k], fps=20, macro_block_size=xxx), there will be an error:It is worth mentioning that when I use
low_dimobservation form as input, the generated video is observable and there is no suchWARNING:imageio_ffmpeg.Is this a bug? Or is it just me who has this issue?
Thanks!