xingjianleng
/

mplug_visual-question-answering_coco_large_en

volume:abs/2205.12005

Model card Files Files and versions Community

mplug_visual-question-answering_coco_large_en / config.yaml

xingjianleng's picture

upload

510b154 4 months ago

history blame contribute delete

813 Bytes

	bert_config: 'config_bert.json'

	image_res: 504
	batch_size_train: 128
	vision_width: 1024
	distill: True
	clip_name: "ViT-L-14"
	batch_size_test: 64
	k_test: 128

	alpha: 0.4
	warm_up: True

	eos: '[SEP]'

	optimizer: {opt: adamW, lr1: 3e-5, lr2: 5e-6, weight_decay: 0.02}
	schedular: {sched: cosine, lr: 3e-5, epochs: 8, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 4, cooldown_epochs: 0}

	# predictor
	min_length: 1
	max_length: 10
	beam_size: 5
	add_ocr: False
	add_object: False
	text_encoder: 'bert-base-uncased'
	text_decoder: 'bert-base-uncased'

	# clip
	clip_embed_dim: 768
	clip_image_resolution: 224
	clip_vision_layers: 24
	clip_vision_width: 1024
	clip_vision_patch_size: 14
	clip_context_length: 77
	clip_vocab_size: 49408
	clip_transformer_width: 768
	clip_transformer_heads: 12
	clip_transformer_layers: 12