Upload folder using huggingface_hub
Browse files- vith16_k400_16x8x3.yaml +39 -0
- vitl16_k400_16x8x3.yaml +39 -0
vith16_k400_16x8x3.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
nodes: 8
|
2 |
+
tasks_per_node: 8
|
3 |
+
tag: k400-16x8x3
|
4 |
+
eval_name: video_classification_frozen
|
5 |
+
resume_checkpoint: false
|
6 |
+
data:
|
7 |
+
dataset_train: /your_path_to_kinetics400_train_csv_file_index.csv
|
8 |
+
dataset_val: /your_path_to_kinetics400_val_csv_file_index.csv
|
9 |
+
dataset_type: VideoDataset
|
10 |
+
num_classes: 400
|
11 |
+
frames_per_clip: 16
|
12 |
+
num_segments: 8
|
13 |
+
num_views_per_segment: 3
|
14 |
+
frame_step: 4
|
15 |
+
optimization:
|
16 |
+
attend_across_segments: true
|
17 |
+
num_epochs: 20
|
18 |
+
resolution: 224
|
19 |
+
batch_size: 4
|
20 |
+
weight_decay: 0.01
|
21 |
+
lr: 0.001
|
22 |
+
start_lr: 0.001
|
23 |
+
final_lr: 0.0
|
24 |
+
warmup: 0.
|
25 |
+
use_bfloat16: true
|
26 |
+
pretrain:
|
27 |
+
model_name: vit_huge
|
28 |
+
checkpoint_key: target_encoder
|
29 |
+
clip_duration: null
|
30 |
+
frames_per_clip: 16
|
31 |
+
tubelet_size: 2
|
32 |
+
uniform_power: true
|
33 |
+
use_silu: false
|
34 |
+
tight_silu: false
|
35 |
+
use_sdpa: true
|
36 |
+
patch_size: 16
|
37 |
+
folder: /your_absolute_file_path_to_directory_where_pretrained_models_are_contained/
|
38 |
+
checkpoint: jepa-latest.pth.tar # name of pretrained model file inside folder
|
39 |
+
write_tag: jepa
|
vitl16_k400_16x8x3.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
nodes: 8
|
2 |
+
tasks_per_node: 8
|
3 |
+
tag: k400-16x8x3
|
4 |
+
eval_name: video_classification_frozen
|
5 |
+
resume_checkpoint: false
|
6 |
+
data:
|
7 |
+
dataset_train: /your_path_to_kinetics400_train_csv_file_index.csv
|
8 |
+
dataset_val: /your_path_to_kinetics400_val_csv_file_index.csv
|
9 |
+
dataset_type: VideoDataset
|
10 |
+
num_classes: 400
|
11 |
+
frames_per_clip: 16
|
12 |
+
num_segments: 8
|
13 |
+
num_views_per_segment: 3
|
14 |
+
frame_step: 4
|
15 |
+
optimization:
|
16 |
+
attend_across_segments: true
|
17 |
+
num_epochs: 20
|
18 |
+
resolution: 224
|
19 |
+
batch_size: 4
|
20 |
+
weight_decay: 0.01
|
21 |
+
lr: 0.001
|
22 |
+
start_lr: 0.001
|
23 |
+
final_lr: 0.0
|
24 |
+
warmup: 0.
|
25 |
+
use_bfloat16: true
|
26 |
+
pretrain:
|
27 |
+
model_name: vit_large
|
28 |
+
checkpoint_key: target_encoder
|
29 |
+
clip_duration: null
|
30 |
+
frames_per_clip: 16
|
31 |
+
tubelet_size: 2
|
32 |
+
uniform_power: true
|
33 |
+
use_silu: false
|
34 |
+
tight_silu: false
|
35 |
+
use_sdpa: true
|
36 |
+
patch_size: 16
|
37 |
+
folder: /your_absolute_file_path_to_directory_where_pretrained_models_are_contained/
|
38 |
+
checkpoint: jepa-latest.pth.tar # name of pretrained model file inside folder
|
39 |
+
write_tag: jepa
|