ntviet commited on
Commit
bacfcbb
·
verified ·
1 Parent(s): fedbe20

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<mask>": 250053,
3
+ "az_AZ": 250027,
4
+ "bn_IN": 250028,
5
+ "fa_IR": 250029,
6
+ "gl_ES": 250051,
7
+ "he_IL": 250030,
8
+ "hr_HR": 250031,
9
+ "id_ID": 250032,
10
+ "ka_GE": 250033,
11
+ "km_KH": 250034,
12
+ "mk_MK": 250035,
13
+ "ml_IN": 250036,
14
+ "mn_MN": 250037,
15
+ "mr_IN": 250038,
16
+ "pl_PL": 250039,
17
+ "ps_AF": 250040,
18
+ "pt_XX": 250041,
19
+ "sl_SI": 250052,
20
+ "sv_SE": 250042,
21
+ "sw_KE": 250043,
22
+ "ta_IN": 250044,
23
+ "te_IN": 250045,
24
+ "th_TH": 250046,
25
+ "tl_XX": 250047,
26
+ "uk_UA": 250048,
27
+ "ur_PK": 250049,
28
+ "xh_ZA": 250050
29
+ }
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "facebook/mbart-large-50-many-to-many-mmt",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
6
  "add_bias_logits": false,
7
  "add_final_layer_norm": true,
8
  "architectures": [
9
- "MBartForConditionalGeneration"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 0,
@@ -52,7 +52,7 @@
52
  "static_position_embeddings": false,
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "torch_dtype": "float32",
55
- "transformers_version": "4.47.1",
56
  "use_cache": true,
57
  "vocab_size": 250054
58
  }
 
1
  {
2
+ "_name_or_path": "ntviet/mbart-hre-viet1.0",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
6
  "add_bias_logits": false,
7
  "add_final_layer_norm": true,
8
  "architectures": [
9
+ "TranslationModel"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 0,
 
52
  "static_position_embeddings": false,
53
  "tokenizer_class": "MBart50Tokenizer",
54
  "torch_dtype": "float32",
55
+ "transformers_version": "4.48.2",
56
  "use_cache": true,
57
  "vocab_size": 250054
58
  }
generation_config.json CHANGED
@@ -2,11 +2,8 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "decoder_start_token_id": 2,
5
- "early_stopping": true,
6
  "eos_token_id": 2,
7
  "forced_eos_token_id": 2,
8
- "max_length": 200,
9
- "num_beams": 5,
10
  "pad_token_id": 1,
11
- "transformers_version": "4.47.1"
12
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "decoder_start_token_id": 2,
 
5
  "eos_token_id": 2,
6
  "forced_eos_token_id": 2,
 
 
7
  "pad_token_id": 1,
8
+ "transformers_version": "4.48.2"
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a38c6ca205e7a78156a9984c157e3c1349db3e403de3b3411e0f67a27029d9d
3
- size 2444578688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cff27fa0e23cafddffb59c02d3661a16d19ef793714e8b01c8a6abfa6064bf2e
3
+ size 3469804224
special_tokens_map.json CHANGED
@@ -53,9 +53,27 @@
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
- "bos_token": "<s>",
57
- "cls_token": "<s>",
58
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "mask_token": {
60
  "content": "<mask>",
61
  "lstrip": true,
@@ -63,7 +81,25 @@
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
- "pad_token": "<pad>",
67
- "sep_token": "</s>",
68
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
 
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
+ "bos_token": {
57
+ "content": "<s>",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false
62
+ },
63
+ "cls_token": {
64
+ "content": "<s>",
65
+ "lstrip": false,
66
+ "normalized": false,
67
+ "rstrip": false,
68
+ "single_word": false
69
+ },
70
+ "eos_token": {
71
+ "content": "</s>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false
76
+ },
77
  "mask_token": {
78
  "content": "<mask>",
79
  "lstrip": true,
 
81
  "rstrip": false,
82
  "single_word": false
83
  },
84
+ "pad_token": {
85
+ "content": "<pad>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false
90
+ },
91
+ "sep_token": {
92
+ "content": "</s>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false
97
+ },
98
+ "unk_token": {
99
+ "content": "<unk>",
100
+ "lstrip": false,
101
+ "normalized": false,
102
+ "rstrip": false,
103
+ "single_word": false
104
+ }
105
  }
tokenizer_config.json CHANGED
@@ -524,6 +524,6 @@
524
  "sp_model_kwargs": {},
525
  "src_lang": "vi_VN",
526
  "tgt_lang": "vi_VN",
527
- "tokenizer_class": "MBart50Tokenizer",
528
  "unk_token": "<unk>"
529
  }
 
524
  "sp_model_kwargs": {},
525
  "src_lang": "vi_VN",
526
  "tgt_lang": "vi_VN",
527
+ "tokenizer_class": "MBartTokenizer",
528
  "unk_token": "<unk>"
529
  }