nodes: 8 tasks_per_node: 8 tag: k400-16x8x3 eval_name: video_classification_frozen resume_checkpoint: false data: dataset_train: /your_path_to_kinetics400_train_csv_file_index.csv dataset_val: /your_path_to_kinetics400_val_csv_file_index.csv dataset_type: VideoDataset num_classes: 400 frames_per_clip: 16 num_segments: 8 num_views_per_segment: 3 frame_step: 4 optimization: attend_across_segments: true num_epochs: 20 resolution: 224 batch_size: 4 weight_decay: 0.01 lr: 0.001 start_lr: 0.001 final_lr: 0.0 warmup: 0. use_bfloat16: true pretrain: model_name: vit_huge checkpoint_key: target_encoder clip_duration: null frames_per_clip: 16 tubelet_size: 2 uniform_power: true use_silu: false tight_silu: false use_sdpa: true patch_size: 16 folder: /your_absolute_file_path_to_directory_where_pretrained_models_are_contained/ checkpoint: jepa-latest.pth.tar # name of pretrained model file inside folder write_tag: jepa