{ "architectures": [ "VJEPA2ForVideoClassification" ], "attention_dropout": 0.0, "attention_probs_dropout_prob": 0.0, "crop_size": 384, "drop_path_rate": 0.0, "frames_per_clip": 32, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 1408, "id2label": { "0": "['Back', '15som', '05Twis', 'FREE']", "1": "['Back', '15som', '15Twis', 'FREE']", "2": "['Back', '15som', '25Twis', 'FREE']", "3": "['Back', '15som', 'NoTwis', 'PIKE']", "4": "['Back', '15som', 'NoTwis', 'TUCK']", "5": "['Back', '25som', '15Twis', 'PIKE']", "6": "['Back', '25som', '25Twis', 'PIKE']", "7": "['Back', '25som', 'NoTwis', 'PIKE']", "8": "['Back', '25som', 'NoTwis', 'TUCK']", "9": "['Back', '2som', '15Twis', 'FREE']", "10": "['Back', '2som', '25Twis', 'FREE']", "11": "['Back', '35som', 'NoTwis', 'PIKE']", "12": "['Back', '35som', 'NoTwis', 'TUCK']", "13": "['Back', '3som', 'NoTwis', 'PIKE']", "14": "['Back', '3som', 'NoTwis', 'TUCK']", "15": "['Back', 'Dive', 'NoTwis', 'PIKE']", "16": "['Back', 'Dive', 'NoTwis', 'TUCK']", "17": "['Forward', '15som', '1Twis', 'FREE']", "18": "['Forward', '15som', '2Twis', 'FREE']", "19": "['Forward', '15som', 'NoTwis', 'PIKE']", "20": "['Forward', '1som', 'NoTwis', 'PIKE']", "21": "['Forward', '25som', '1Twis', 'PIKE']", "22": "['Forward', '25som', '2Twis', 'PIKE']", "23": "['Forward', '25som', '3Twis', 'PIKE']", "24": "['Forward', '25som', 'NoTwis', 'PIKE']", "25": "['Forward', '25som', 'NoTwis', 'TUCK']", "26": "['Forward', '35som', 'NoTwis', 'PIKE']", "27": "['Forward', '35som', 'NoTwis', 'TUCK']", "28": "['Forward', '45som', 'NoTwis', 'TUCK']", "29": "['Forward', 'Dive', 'NoTwis', 'PIKE']", "30": "['Forward', 'Dive', 'NoTwis', 'STR']", "31": "['Inward', '15som', 'NoTwis', 'PIKE']", "32": "['Inward', '15som', 'NoTwis', 'TUCK']", "33": "['Inward', '25som', 'NoTwis', 'PIKE']", "34": "['Inward', '25som', 'NoTwis', 'TUCK']", "35": "['Inward', '35som', 'NoTwis', 'TUCK']", "36": "['Inward', 'Dive', 'NoTwis', 'PIKE']", "37": "['Reverse', '15som', '05Twis', 'FREE']", "38": "['Reverse', '15som', '15Twis', 'FREE']", "39": "['Reverse', '15som', '25Twis', 'FREE']", "40": "['Reverse', '15som', '35Twis', 'FREE']", "41": "['Reverse', '15som', 'NoTwis', 'PIKE']", "42": "['Reverse', '25som', '15Twis', 'PIKE']", "43": "['Reverse', '25som', 'NoTwis', 'PIKE']", "44": "['Reverse', '25som', 'NoTwis', 'TUCK']", "45": "['Reverse', '35som', 'NoTwis', 'TUCK']", "46": "['Reverse', 'Dive', 'NoTwis', 'PIKE']", "47": "['Reverse', 'Dive', 'NoTwis', 'TUCK']" }, "image_size": 384, "in_chans": 3, "initializer_range": 0.02, "label2id": { "LABEL_0": 0, "LABEL_1": 1, "LABEL_10": 10, "LABEL_11": 11, "LABEL_12": 12, "LABEL_13": 13, "LABEL_14": 14, "LABEL_15": 15, "LABEL_16": 16, "LABEL_17": 17, "LABEL_18": 18, "LABEL_19": 19, "LABEL_2": 2, "LABEL_20": 20, "LABEL_21": 21, "LABEL_22": 22, "LABEL_23": 23, "LABEL_24": 24, "LABEL_25": 25, "LABEL_26": 26, "LABEL_27": 27, "LABEL_28": 28, "LABEL_29": 29, "LABEL_3": 3, "LABEL_30": 30, "LABEL_31": 31, "LABEL_32": 32, "LABEL_33": 33, "LABEL_34": 34, "LABEL_35": 35, "LABEL_36": 36, "LABEL_37": 37, "LABEL_38": 38, "LABEL_39": 39, "LABEL_4": 4, "LABEL_40": 40, "LABEL_41": 41, "LABEL_42": 42, "LABEL_43": 43, "LABEL_44": 44, "LABEL_45": 45, "LABEL_46": 46, "LABEL_47": 47, "LABEL_5": 5, "LABEL_6": 6, "LABEL_7": 7, "LABEL_8": 8, "LABEL_9": 9 }, "layer_norm_eps": 1e-06, "mlp_ratio": 4.363636363636363, "model_type": "vjepa2", "num_attention_heads": 22, "num_hidden_layers": 40, "num_pooler_layers": 3, "patch_size": 16, "pred_hidden_size": 384, "pred_mlp_ratio": 4.0, "pred_num_attention_heads": 12, "pred_num_hidden_layers": 12, "pred_num_mask_tokens": 10, "pred_zero_init_mask_tokens": true, "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.53.0.dev0", "tubelet_size": 2, "wide_SiLU": true }