File size: 670 Bytes
401fa20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
VisionEncoders = dict()
VisionEncoders["beit"] = dict(
    name="beit_base",
    pretrained="microsoft/beit-base-patch16-224-pt22k-ft22k",
    d_model=768,
)
VisionEncoders["beit_large"] = dict(
    name="beit_large",
    pretrained="microsoft/beit-large-patch16-224-pt22k-ft22k",
    d_model=1024,
)

TextEncoders = dict()
TextEncoders["bert"] = dict(
    name="bert_base",
    pretrained="bert-base-uncased",
    config="configs/config_bert.json",
    d_model=768,
    fusion_layer=9,
)
TextEncoders["bert_large"] = dict(
    name="bert_large",
    pretrained="bert-large-uncased",
    config="configs/config_bert_large.json",
    d_model=1024,
    fusion_layer=19,
)