vjepa-ckpts / vitl16_k400_16x8x3.yaml
sayakpaul's picture
sayakpaul HF staff
Upload folder using huggingface_hub
236ee3e verified
raw
history blame contribute delete
983 Bytes
nodes: 8
tasks_per_node: 8
tag: k400-16x8x3
eval_name: video_classification_frozen
resume_checkpoint: false
data:
dataset_train: /your_path_to_kinetics400_train_csv_file_index.csv
dataset_val: /your_path_to_kinetics400_val_csv_file_index.csv
dataset_type: VideoDataset
num_classes: 400
frames_per_clip: 16
num_segments: 8
num_views_per_segment: 3
frame_step: 4
optimization:
attend_across_segments: true
num_epochs: 20
resolution: 224
batch_size: 4
weight_decay: 0.01
lr: 0.001
start_lr: 0.001
final_lr: 0.0
warmup: 0.
use_bfloat16: true
pretrain:
model_name: vit_large
checkpoint_key: target_encoder
clip_duration: null
frames_per_clip: 16
tubelet_size: 2
uniform_power: true
use_silu: false
tight_silu: false
use_sdpa: true
patch_size: 16
folder: /your_absolute_file_path_to_directory_where_pretrained_models_are_contained/
checkpoint: jepa-latest.pth.tar # name of pretrained model file inside folder
write_tag: jepa