| run_id: 0903_libero_spatial_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_16_pretrained_vlm | |
| run_root_dir: ./playground/Checkpoints | |
| seed: 42 | |
| trackers: | |
| - jsonl | |
| - wandb | |
| wandb_entity: michaelyu-1101-fudanuniversity | |
| wandb_project: Internvla | |
| is_debug: false | |
| framework: | |
| framework_py: DinoQFormerACT | |
| qwenvl: | |
| base_vlm: Qwen/Qwen2.5-VL-3B-Instruct | |
| attn_implementation: flash_attention_2 | |
| vl_hidden_dim: 2048 | |
| dino: | |
| dino_backbone: dinov2_vitl14 | |
| layer_qformer: | |
| qformer_end_layer: 37 | |
| qformer_start_layer: 36 | |
| num_query_tokens: 64 | |
| input_dim: 2048 | |
| ouptput_dim: 768 | |
| grad_scale: 0.5 | |
| action_model: | |
| action_model_type: DiT-B | |
| action_hidden_dim: 768 | |
| action_dim: 7 | |
| input_dim: 2048 | |
| ouptput_dim: 768 | |
| use_ema: false | |
| future_action_window_size: 7 | |
| past_action_window_size: 0 | |
| repeated_diffusion_steps: 8 | |
| reduce_in_full_precision: true | |
| datasets: | |
| vlm_data: | |
| dataformat: llava_json | |
| dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en | |
| eval_dataset: aokvqa_cauldron_llava_format | |
| data_flatten: false | |
| base_interval: 2 | |
| max_pixels: 50176 | |
| min_pixels: 784 | |
| fix_image_size: | |
| - 224 | |
| - 224 | |
| model_max_length: 1024 | |
| model_type: qwen2.5vl | |
| per_device_batch_size: 4 | |
| vla_data: | |
| dataset_py: lerobot_libero | |
| data_root_dir: playground/Datasets/LEROBOT_LIBERO_DATA | |
| data_mix: libero_spatial | |
| action_type: delta_qpos | |
| CoT_prompt: Your task is {instruction}. To identify the key objects for your task. | |
| Locate their bounding boxes in [x1,y1,x2,y2] format. | |
| CoT_answer: bbox | |
| default_image_resolution: | |
| - 3 | |
| - 224 | |
| - 224 | |
| per_device_batch_size: 16 | |
| load_all_data_for_training: true | |
| obs: | |
| - image_0 | |
| trainer: | |
| epochs: 100 | |
| max_train_steps: 100000 | |
| num_warmup_steps: 5000 | |
| save_interval: 10000 | |
| eval_interval: 1000 | |
| learning_rate: | |
| base: 2.5e-05 | |
| lr_scheduler_type: cosine_with_min_lr | |
| scheduler_specific_kwargs: | |
| min_lr: 1.0e-06 | |
| freeze_modules: '' | |
| loss_scale: | |
| vla: 1.0 | |
| vlm: 0.1 | |
| max_grad_norm: 1.0 | |
| warmup_ratio: 0.1 | |
| weight_decay: 0.0 | |
| logging_frequency: 10 | |
| gradient_clipping: 1.0 | |
| gradient_accumulation_steps: 1 | |
| optimizer: | |
| name: AdamW | |
| betas: | |
| - 0.9 | |
| - 0.95 | |
| eps: 1.0e-08 | |
| weight_decay: 1.0e-08 | |
| is_resume: false | |
| resume_epoch: null | |
| resume_step: null | |
| enable_gradient_checkpointing: true | |
| enable_mixed_precision_training: true | |
| output_dir: ./playground/Checkpoints/0903_libero_spatial_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_16_pretrained_vlm | |
