Xenova's picture
Xenova HF Staff
Upload 3 files
adbae1b verified
{
"architectures": [
"VJEPA2ForVideoClassification"
],
"attention_dropout": 0.0,
"attention_probs_dropout_prob": 0.0,
"crop_size": 256,
"drop_path_rate": 0.0,
"frames_per_clip": 32,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 1024,
"id2label": {
"0": "['Back', '15som', '05Twis', 'FREE']",
"1": "['Back', '15som', '15Twis', 'FREE']",
"2": "['Back', '15som', '25Twis', 'FREE']",
"3": "['Back', '15som', 'NoTwis', 'PIKE']",
"4": "['Back', '15som', 'NoTwis', 'TUCK']",
"5": "['Back', '25som', '15Twis', 'PIKE']",
"6": "['Back', '25som', '25Twis', 'PIKE']",
"7": "['Back', '25som', 'NoTwis', 'PIKE']",
"8": "['Back', '25som', 'NoTwis', 'TUCK']",
"9": "['Back', '2som', '15Twis', 'FREE']",
"10": "['Back', '2som', '25Twis', 'FREE']",
"11": "['Back', '35som', 'NoTwis', 'PIKE']",
"12": "['Back', '35som', 'NoTwis', 'TUCK']",
"13": "['Back', '3som', 'NoTwis', 'PIKE']",
"14": "['Back', '3som', 'NoTwis', 'TUCK']",
"15": "['Back', 'Dive', 'NoTwis', 'PIKE']",
"16": "['Back', 'Dive', 'NoTwis', 'TUCK']",
"17": "['Forward', '15som', '1Twis', 'FREE']",
"18": "['Forward', '15som', '2Twis', 'FREE']",
"19": "['Forward', '15som', 'NoTwis', 'PIKE']",
"20": "['Forward', '1som', 'NoTwis', 'PIKE']",
"21": "['Forward', '25som', '1Twis', 'PIKE']",
"22": "['Forward', '25som', '2Twis', 'PIKE']",
"23": "['Forward', '25som', '3Twis', 'PIKE']",
"24": "['Forward', '25som', 'NoTwis', 'PIKE']",
"25": "['Forward', '25som', 'NoTwis', 'TUCK']",
"26": "['Forward', '35som', 'NoTwis', 'PIKE']",
"27": "['Forward', '35som', 'NoTwis', 'TUCK']",
"28": "['Forward', '45som', 'NoTwis', 'TUCK']",
"29": "['Forward', 'Dive', 'NoTwis', 'PIKE']",
"30": "['Forward', 'Dive', 'NoTwis', 'STR']",
"31": "['Inward', '15som', 'NoTwis', 'PIKE']",
"32": "['Inward', '15som', 'NoTwis', 'TUCK']",
"33": "['Inward', '25som', 'NoTwis', 'PIKE']",
"34": "['Inward', '25som', 'NoTwis', 'TUCK']",
"35": "['Inward', '35som', 'NoTwis', 'TUCK']",
"36": "['Inward', 'Dive', 'NoTwis', 'PIKE']",
"37": "['Reverse', '15som', '05Twis', 'FREE']",
"38": "['Reverse', '15som', '15Twis', 'FREE']",
"39": "['Reverse', '15som', '25Twis', 'FREE']",
"40": "['Reverse', '15som', '35Twis', 'FREE']",
"41": "['Reverse', '15som', 'NoTwis', 'PIKE']",
"42": "['Reverse', '25som', '15Twis', 'PIKE']",
"43": "['Reverse', '25som', 'NoTwis', 'PIKE']",
"44": "['Reverse', '25som', 'NoTwis', 'TUCK']",
"45": "['Reverse', '35som', 'NoTwis', 'TUCK']",
"46": "['Reverse', 'Dive', 'NoTwis', 'PIKE']",
"47": "['Reverse', 'Dive', 'NoTwis', 'TUCK']"
},
"image_size": 256,
"in_chans": 3,
"initializer_range": 0.02,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1,
"LABEL_10": 10,
"LABEL_11": 11,
"LABEL_12": 12,
"LABEL_13": 13,
"LABEL_14": 14,
"LABEL_15": 15,
"LABEL_16": 16,
"LABEL_17": 17,
"LABEL_18": 18,
"LABEL_19": 19,
"LABEL_2": 2,
"LABEL_20": 20,
"LABEL_21": 21,
"LABEL_22": 22,
"LABEL_23": 23,
"LABEL_24": 24,
"LABEL_25": 25,
"LABEL_26": 26,
"LABEL_27": 27,
"LABEL_28": 28,
"LABEL_29": 29,
"LABEL_3": 3,
"LABEL_30": 30,
"LABEL_31": 31,
"LABEL_32": 32,
"LABEL_33": 33,
"LABEL_34": 34,
"LABEL_35": 35,
"LABEL_36": 36,
"LABEL_37": 37,
"LABEL_38": 38,
"LABEL_39": 39,
"LABEL_4": 4,
"LABEL_40": 40,
"LABEL_41": 41,
"LABEL_42": 42,
"LABEL_43": 43,
"LABEL_44": 44,
"LABEL_45": 45,
"LABEL_46": 46,
"LABEL_47": 47,
"LABEL_5": 5,
"LABEL_6": 6,
"LABEL_7": 7,
"LABEL_8": 8,
"LABEL_9": 9
},
"layer_norm_eps": 1e-06,
"mlp_ratio": 4,
"model_type": "vjepa2",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"num_pooler_layers": 3,
"patch_size": 16,
"pred_hidden_size": 384,
"pred_mlp_ratio": 4.0,
"pred_num_attention_heads": 12,
"pred_num_hidden_layers": 12,
"pred_num_mask_tokens": 10,
"pred_zero_init_mask_tokens": true,
"qkv_bias": true,
"torch_dtype": "float32",
"transformers_version": "4.53.0.dev0",
"tubelet_size": 2,
"use_SiLU": false,
"wide_SiLU": true
}