|
{ |
|
"architectures": [ |
|
"VJEPA2ForVideoClassification" |
|
], |
|
"attention_dropout": 0.0, |
|
"attention_probs_dropout_prob": 0.0, |
|
"crop_size": 384, |
|
"drop_path_rate": 0.0, |
|
"frames_per_clip": 32, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.0, |
|
"hidden_size": 1408, |
|
"id2label": { |
|
"0": "['Back', '15som', '05Twis', 'FREE']", |
|
"1": "['Back', '15som', '15Twis', 'FREE']", |
|
"2": "['Back', '15som', '25Twis', 'FREE']", |
|
"3": "['Back', '15som', 'NoTwis', 'PIKE']", |
|
"4": "['Back', '15som', 'NoTwis', 'TUCK']", |
|
"5": "['Back', '25som', '15Twis', 'PIKE']", |
|
"6": "['Back', '25som', '25Twis', 'PIKE']", |
|
"7": "['Back', '25som', 'NoTwis', 'PIKE']", |
|
"8": "['Back', '25som', 'NoTwis', 'TUCK']", |
|
"9": "['Back', '2som', '15Twis', 'FREE']", |
|
"10": "['Back', '2som', '25Twis', 'FREE']", |
|
"11": "['Back', '35som', 'NoTwis', 'PIKE']", |
|
"12": "['Back', '35som', 'NoTwis', 'TUCK']", |
|
"13": "['Back', '3som', 'NoTwis', 'PIKE']", |
|
"14": "['Back', '3som', 'NoTwis', 'TUCK']", |
|
"15": "['Back', 'Dive', 'NoTwis', 'PIKE']", |
|
"16": "['Back', 'Dive', 'NoTwis', 'TUCK']", |
|
"17": "['Forward', '15som', '1Twis', 'FREE']", |
|
"18": "['Forward', '15som', '2Twis', 'FREE']", |
|
"19": "['Forward', '15som', 'NoTwis', 'PIKE']", |
|
"20": "['Forward', '1som', 'NoTwis', 'PIKE']", |
|
"21": "['Forward', '25som', '1Twis', 'PIKE']", |
|
"22": "['Forward', '25som', '2Twis', 'PIKE']", |
|
"23": "['Forward', '25som', '3Twis', 'PIKE']", |
|
"24": "['Forward', '25som', 'NoTwis', 'PIKE']", |
|
"25": "['Forward', '25som', 'NoTwis', 'TUCK']", |
|
"26": "['Forward', '35som', 'NoTwis', 'PIKE']", |
|
"27": "['Forward', '35som', 'NoTwis', 'TUCK']", |
|
"28": "['Forward', '45som', 'NoTwis', 'TUCK']", |
|
"29": "['Forward', 'Dive', 'NoTwis', 'PIKE']", |
|
"30": "['Forward', 'Dive', 'NoTwis', 'STR']", |
|
"31": "['Inward', '15som', 'NoTwis', 'PIKE']", |
|
"32": "['Inward', '15som', 'NoTwis', 'TUCK']", |
|
"33": "['Inward', '25som', 'NoTwis', 'PIKE']", |
|
"34": "['Inward', '25som', 'NoTwis', 'TUCK']", |
|
"35": "['Inward', '35som', 'NoTwis', 'TUCK']", |
|
"36": "['Inward', 'Dive', 'NoTwis', 'PIKE']", |
|
"37": "['Reverse', '15som', '05Twis', 'FREE']", |
|
"38": "['Reverse', '15som', '15Twis', 'FREE']", |
|
"39": "['Reverse', '15som', '25Twis', 'FREE']", |
|
"40": "['Reverse', '15som', '35Twis', 'FREE']", |
|
"41": "['Reverse', '15som', 'NoTwis', 'PIKE']", |
|
"42": "['Reverse', '25som', '15Twis', 'PIKE']", |
|
"43": "['Reverse', '25som', 'NoTwis', 'PIKE']", |
|
"44": "['Reverse', '25som', 'NoTwis', 'TUCK']", |
|
"45": "['Reverse', '35som', 'NoTwis', 'TUCK']", |
|
"46": "['Reverse', 'Dive', 'NoTwis', 'PIKE']", |
|
"47": "['Reverse', 'Dive', 'NoTwis', 'TUCK']" |
|
}, |
|
"image_size": 384, |
|
"in_chans": 3, |
|
"initializer_range": 0.02, |
|
"label2id": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1, |
|
"LABEL_10": 10, |
|
"LABEL_11": 11, |
|
"LABEL_12": 12, |
|
"LABEL_13": 13, |
|
"LABEL_14": 14, |
|
"LABEL_15": 15, |
|
"LABEL_16": 16, |
|
"LABEL_17": 17, |
|
"LABEL_18": 18, |
|
"LABEL_19": 19, |
|
"LABEL_2": 2, |
|
"LABEL_20": 20, |
|
"LABEL_21": 21, |
|
"LABEL_22": 22, |
|
"LABEL_23": 23, |
|
"LABEL_24": 24, |
|
"LABEL_25": 25, |
|
"LABEL_26": 26, |
|
"LABEL_27": 27, |
|
"LABEL_28": 28, |
|
"LABEL_29": 29, |
|
"LABEL_3": 3, |
|
"LABEL_30": 30, |
|
"LABEL_31": 31, |
|
"LABEL_32": 32, |
|
"LABEL_33": 33, |
|
"LABEL_34": 34, |
|
"LABEL_35": 35, |
|
"LABEL_36": 36, |
|
"LABEL_37": 37, |
|
"LABEL_38": 38, |
|
"LABEL_39": 39, |
|
"LABEL_4": 4, |
|
"LABEL_40": 40, |
|
"LABEL_41": 41, |
|
"LABEL_42": 42, |
|
"LABEL_43": 43, |
|
"LABEL_44": 44, |
|
"LABEL_45": 45, |
|
"LABEL_46": 46, |
|
"LABEL_47": 47, |
|
"LABEL_5": 5, |
|
"LABEL_6": 6, |
|
"LABEL_7": 7, |
|
"LABEL_8": 8, |
|
"LABEL_9": 9 |
|
}, |
|
"layer_norm_eps": 1e-06, |
|
"mlp_ratio": 4.363636363636363, |
|
"model_type": "vjepa2", |
|
"num_attention_heads": 22, |
|
"num_hidden_layers": 40, |
|
"num_pooler_layers": 3, |
|
"patch_size": 16, |
|
"pred_hidden_size": 384, |
|
"pred_mlp_ratio": 4.0, |
|
"pred_num_attention_heads": 12, |
|
"pred_num_hidden_layers": 12, |
|
"pred_num_mask_tokens": 10, |
|
"pred_zero_init_mask_tokens": true, |
|
"qkv_bias": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.53.0.dev0", |
|
"tubelet_size": 2, |
|
"wide_SiLU": true |
|
} |
|
|