xingjianleng's picture
upload
510b154
bert_config: 'config_bert.json'
image_res: 504
batch_size_train: 128
vision_width: 1024
distill: True
clip_name: "ViT-L-14"
batch_size_test: 64
k_test: 128
alpha: 0.4
warm_up: True
eos: '[SEP]'
optimizer: {opt: adamW, lr1: 3e-5, lr2: 5e-6, weight_decay: 0.02}
schedular: {sched: cosine, lr: 3e-5, epochs: 8, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 4, cooldown_epochs: 0}
# predictor
min_length: 1
max_length: 10
beam_size: 5
add_ocr: False
add_object: False
text_encoder: 'bert-base-uncased'
text_decoder: 'bert-base-uncased'
# clip
clip_embed_dim: 768
clip_image_resolution: 224
clip_vision_layers: 24
clip_vision_width: 1024
clip_vision_patch_size: 14
clip_context_length: 77
clip_vocab_size: 49408
clip_transformer_width: 768
clip_transformer_heads: 12
clip_transformer_layers: 12