FidelOdok
/

doa_model_TL4

Audio Classification

audio-spectrogram-transformer

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

doa_model_TL4 / config.json

FidelOdok's picture

Training in progress, epoch 1

b7a16d2 over 1 year ago

history blame contribute delete

3.8 kB

	{
	"_name_or_path": "MIT/ast-finetuned-audioset-10-10-0.4593",
	"architectures": [
	"ASTForAudioClassification"
	],
	"attention_probs_dropout_prob": 0.0,
	"frequency_stride": 10,
	"hidden_act": "gelu",
	"hidden_dropout_prob": 0.0,
	"hidden_size": 768,
	"id2label": {
	"0": "0",
	"1": "101",
	"10": "151",
	"11": "156",
	"12": "166",
	"13": "169",
	"14": "171",
	"15": "172",
	"16": "18",
	"17": "182",
	"18": "187",
	"19": "189",
	"2": "106",
	"20": "190",
	"21": "192",
	"22": "200",
	"23": "205",
	"24": "207",
	"25": "209",
	"26": "211",
	"27": "218",
	"28": "219",
	"29": "221",
	"3": "112",
	"30": "224",
	"31": "226",
	"32": "227",
	"33": "229",
	"34": "237",
	"35": "239",
	"36": "242",
	"37": "244",
	"38": "257",
	"39": "26",
	"4": "117",
	"40": "260",
	"41": "262",
	"42": "265",
	"43": "278",
	"44": "281",
	"45": "3",
	"46": "312",
	"47": "317",
	"48": "328",
	"49": "343",
	"5": "122",
	"50": "351",
	"51": "354",
	"52": "356",
	"53": "358",
	"54": "359",
	"55": "368",
	"56": "369",
	"57": "371",
	"58": "372",
	"59": "373",
	"6": "129",
	"60": "378",
	"61": "380",
	"62": "383",
	"63": "385",
	"64": "386",
	"65": "391",
	"66": "394",
	"67": "397",
	"68": "4",
	"69": "422",
	"7": "134",
	"70": "423",
	"71": "424",
	"72": "426",
	"73": "427",
	"74": "428",
	"75": "46",
	"76": "49",
	"77": "5",
	"78": "50",
	"79": "58",
	"8": "137",
	"80": "6",
	"81": "66",
	"82": "67",
	"83": "69",
	"84": "7",
	"85": "71",
	"86": "73",
	"87": "82",
	"88": "84",
	"89": "86",
	"9": "139",
	"90": "87",
	"91": "89",
	"92": "96"
	},
	"initializer_range": 0.02,
	"intermediate_size": 3072,
	"label2id": {
	"0": "0",
	"101": "1",
	"106": "2",
	"112": "3",
	"117": "4",
	"122": "5",
	"129": "6",
	"134": "7",
	"137": "8",
	"139": "9",
	"151": "10",
	"156": "11",
	"166": "12",
	"169": "13",
	"171": "14",
	"172": "15",
	"18": "16",
	"182": "17",
	"187": "18",
	"189": "19",
	"190": "20",
	"192": "21",
	"200": "22",
	"205": "23",
	"207": "24",
	"209": "25",
	"211": "26",
	"218": "27",
	"219": "28",
	"221": "29",
	"224": "30",
	"226": "31",
	"227": "32",
	"229": "33",
	"237": "34",
	"239": "35",
	"242": "36",
	"244": "37",
	"257": "38",
	"26": "39",
	"260": "40",
	"262": "41",
	"265": "42",
	"278": "43",
	"281": "44",
	"3": "45",
	"312": "46",
	"317": "47",
	"328": "48",
	"343": "49",
	"351": "50",
	"354": "51",
	"356": "52",
	"358": "53",
	"359": "54",
	"368": "55",
	"369": "56",
	"371": "57",
	"372": "58",
	"373": "59",
	"378": "60",
	"380": "61",
	"383": "62",
	"385": "63",
	"386": "64",
	"391": "65",
	"394": "66",
	"397": "67",
	"4": "68",
	"422": "69",
	"423": "70",
	"424": "71",
	"426": "72",
	"427": "73",
	"428": "74",
	"46": "75",
	"49": "76",
	"5": "77",
	"50": "78",
	"58": "79",
	"6": "80",
	"66": "81",
	"67": "82",
	"69": "83",
	"7": "84",
	"71": "85",
	"73": "86",
	"82": "87",
	"84": "88",
	"86": "89",
	"87": "90",
	"89": "91",
	"96": "92"
	},
	"layer_norm_eps": 1e-12,
	"max_length": 1024,
	"model_type": "audio-spectrogram-transformer",
	"num_attention_heads": 12,
	"num_hidden_layers": 12,
	"num_mel_bins": 128,
	"patch_size": 16,
	"problem_type": "single_label_classification",
	"qkv_bias": true,
	"time_stride": 10,
	"torch_dtype": "float32",
	"transformers_version": "4.28.0"
	}