doberst commited on
Commit
7f81323
·
verified ·
1 Parent(s): 6254f53

Upload 20 files

Browse files
ar128_cl4096_1_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6503cecc31f3d91e43af7a9d5c3ef1002486803ba8e34e8379102b0aebcdd45
3
+ size 2476
ar128_cl4096_2_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e254dc746fe3ebe48e2a8286f5315b158282d286e6d09181c0e43220dc81a61
3
+ size 2581
ar128_cl4096_3_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf49cd2f4b551a98f4fb283ae881c5dda50763867604b2e54c130d219737c759
3
+ size 2599
ar128_cl4096_4_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42ce267cd86f959b49714e6d13b34a1e9a432f485c99cb7bffd079836bc44c1e
3
+ size 2542
ar1_cl4096_1_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a51a43eefc266fce6c94e8ac91edf8ff0b0180f0d2b23b0c7c40b10dc1d0b38
3
+ size 2453
ar1_cl4096_2_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b763fe2465a3307534df1dec3d062ee4ca65ac4b6c3364cfadd9433260f64f2
3
+ size 2558
ar1_cl4096_3_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b2c2fc605449bc436302c7b6cc348099e5b426fcbd66a35d44e9690356a79e5
3
+ size 2576
ar1_cl4096_4_of_4_qnn_ctx.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7c7c2b98e5deb6463268656be57e24c130ec21c4fc267e47723a39134c334a
3
+ size 2519
dequantizer.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9905a234f604f4f8496e517bedab0a342f40e6b6b79765bd7e22185aba3dcdfb
3
+ size 226
genai_config.json ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 4096,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": [],
9
+ "log_severity_level": 0
10
+ },
11
+ "filename": "model.onnx",
12
+ "head_size": 96,
13
+ "hidden_size": 3072,
14
+ "inputs": {
15
+ "input_ids": "input_ids",
16
+ "attention_mask": "attention_mask_before_processor",
17
+ "position_ids": "position_ids",
18
+ "past_key_names": "past_key_%d_in",
19
+ "past_value_names": "past_value_%d_in"
20
+ },
21
+ "outputs": {
22
+ "logits": "logits_dequantized",
23
+ "present_key_names": "past_key_%d_out",
24
+ "present_value_names": "past_value_%d_out"
25
+ },
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 32,
28
+ "num_key_value_heads": 32,
29
+ "sliding_window": {
30
+ "window_size": 128,
31
+ "pad_value": 128
32
+ },
33
+ "pipeline": [
34
+ {
35
+ "position_processor": {
36
+ "filename": "position-processor.onnx",
37
+ "inputs": [
38
+ "attention_mask_before_processor",
39
+ "position_ids"
40
+ ],
41
+ "outputs": [
42
+ "attention_mask_before_quantizer",
43
+ "position_ids_cos_before_quantizer",
44
+ "position_ids_sin_before_quantizer"
45
+ ],
46
+ "session_options": {
47
+ "log_id": "onnxruntime-genai.position_processor",
48
+ "provider_options": [
49
+ {}
50
+ ]
51
+ }
52
+ },
53
+ "quantizer": {
54
+ "filename": "quantizer.onnx",
55
+ "inputs": [
56
+ "attention_mask_before_quantizer",
57
+ "position_ids_cos_before_quantizer",
58
+ "position_ids_sin_before_quantizer"
59
+ ],
60
+ "outputs": [
61
+ "attention_mask",
62
+ "position_ids_cos",
63
+ "position_ids_sin"
64
+ ],
65
+ "session_options": {
66
+ "log_id": "onnxruntime-genai.quantizer",
67
+ "provider_options": [
68
+ {}
69
+ ]
70
+ }
71
+ },
72
+ "prompt-processor-1": {
73
+ "filename": "ar128_cl4096_1_of_4_qnn_ctx.onnx",
74
+ "inputs": [
75
+ "input_ids",
76
+ "past_key_0_in",
77
+ "past_key_5_in",
78
+ "past_value_5_in",
79
+ "past_value_0_in",
80
+ "past_key_6_in",
81
+ "past_value_6_in",
82
+ "past_key_7_in",
83
+ "past_value_7_in",
84
+ "past_key_1_in",
85
+ "past_value_1_in",
86
+ "past_key_2_in",
87
+ "past_value_2_in",
88
+ "past_key_3_in",
89
+ "past_value_3_in",
90
+ "past_key_4_in",
91
+ "past_value_4_in",
92
+ "position_ids_cos",
93
+ "position_ids_sin",
94
+ "attention_mask"
95
+ ],
96
+ "outputs": [
97
+ "past_value_0_out",
98
+ "past_key_0_out",
99
+ "past_value_1_out",
100
+ "past_key_1_out",
101
+ "past_value_2_out",
102
+ "past_key_2_out",
103
+ "past_value_3_out",
104
+ "past_key_3_out",
105
+ "past_value_4_out",
106
+ "past_key_4_out",
107
+ "past_value_5_out",
108
+ "past_key_5_out",
109
+ "past_value_6_out",
110
+ "past_key_6_out",
111
+ "past_value_7_out",
112
+ "past_key_7_out",
113
+ "_model_layers_7_Add_1_Add_output_0"
114
+ ],
115
+ "session_options": {
116
+ "log_id": "onnxruntime-genai.pp1",
117
+ "provider_options": [
118
+ {
119
+ "qnn": {
120
+ "backend_path": "QnnHtp.dll",
121
+ "htp_performance_mode": "burst",
122
+ "enable_htp_shared_memory_allocator": "1",
123
+ "qnn_context_priority": "high"
124
+ }
125
+ }
126
+ ]
127
+ },
128
+ "run_on_token_gen": false
129
+ },
130
+ "prompt-processor-2": {
131
+ "filename": "ar128_cl4096_2_of_4_qnn_ctx.onnx",
132
+ "inputs": [
133
+ "_model_layers_7_Add_1_Add_output_0",
134
+ "past_key_8_in",
135
+ "past_key_13_in",
136
+ "past_value_13_in",
137
+ "past_value_8_in",
138
+ "past_key_14_in",
139
+ "past_value_14_in",
140
+ "past_key_15_in",
141
+ "past_value_15_in",
142
+ "past_key_9_in",
143
+ "past_value_9_in",
144
+ "past_key_10_in",
145
+ "past_value_10_in",
146
+ "past_key_11_in",
147
+ "past_value_11_in",
148
+ "past_key_12_in",
149
+ "past_value_12_in",
150
+ "position_ids_cos",
151
+ "position_ids_sin",
152
+ "attention_mask"
153
+ ],
154
+ "outputs": [
155
+ "past_value_8_out",
156
+ "past_key_8_out",
157
+ "past_value_9_out",
158
+ "past_key_9_out",
159
+ "past_value_10_out",
160
+ "past_key_10_out",
161
+ "past_value_11_out",
162
+ "past_key_11_out",
163
+ "past_value_12_out",
164
+ "past_key_12_out",
165
+ "past_value_13_out",
166
+ "past_key_13_out",
167
+ "past_value_14_out",
168
+ "past_key_14_out",
169
+ "past_value_15_out",
170
+ "past_key_15_out",
171
+ "_model_layers_15_Add_1_Add_output_0"
172
+ ],
173
+ "session_options": {
174
+ "log_id": "onnxruntime-genai.pp2",
175
+ "provider_options": [
176
+ {
177
+ "qnn": {
178
+ "backend_path": "QnnHtp.dll",
179
+ "htp_performance_mode": "burst",
180
+ "enable_htp_shared_memory_allocator": "1",
181
+ "qnn_context_priority": "high"
182
+ }
183
+ }
184
+ ]
185
+ },
186
+ "run_on_token_gen": false
187
+ },
188
+ "prompt-processor-3": {
189
+ "filename": "ar128_cl4096_3_of_4_qnn_ctx.onnx",
190
+ "inputs": [
191
+ "_model_layers_15_Add_1_Add_output_0",
192
+ "past_key_16_in",
193
+ "past_key_21_in",
194
+ "past_value_21_in",
195
+ "past_value_16_in",
196
+ "past_key_22_in",
197
+ "past_value_22_in",
198
+ "past_key_23_in",
199
+ "past_value_23_in",
200
+ "past_key_17_in",
201
+ "past_value_17_in",
202
+ "past_key_18_in",
203
+ "past_value_18_in",
204
+ "past_key_19_in",
205
+ "past_value_19_in",
206
+ "past_key_20_in",
207
+ "past_value_20_in",
208
+ "position_ids_cos",
209
+ "position_ids_sin",
210
+ "attention_mask"
211
+ ],
212
+ "outputs": [
213
+ "past_value_16_out",
214
+ "past_key_16_out",
215
+ "past_value_17_out",
216
+ "past_key_17_out",
217
+ "past_value_18_out",
218
+ "past_key_18_out",
219
+ "past_value_19_out",
220
+ "past_key_19_out",
221
+ "past_value_20_out",
222
+ "past_key_20_out",
223
+ "past_value_21_out",
224
+ "past_key_21_out",
225
+ "past_value_22_out",
226
+ "past_key_22_out",
227
+ "past_value_23_out",
228
+ "past_key_23_out",
229
+ "_model_layers_23_Add_1_Add_output_0"
230
+ ],
231
+ "session_options": {
232
+ "log_id": "onnxruntime-genai.pp3",
233
+ "provider_options": [
234
+ {
235
+ "qnn": {
236
+ "backend_path": "QnnHtp.dll",
237
+ "htp_performance_mode": "burst",
238
+ "enable_htp_shared_memory_allocator": "1",
239
+ "qnn_context_priority": "high"
240
+ }
241
+ }
242
+ ]
243
+ },
244
+ "run_on_token_gen": false
245
+ },
246
+ "prompt-processor-4": {
247
+ "filename": "ar128_cl4096_4_of_4_qnn_ctx.onnx",
248
+ "inputs": [
249
+ "_model_layers_23_Add_1_Add_output_0",
250
+ "past_key_24_in",
251
+ "past_key_29_in",
252
+ "past_value_29_in",
253
+ "past_value_24_in",
254
+ "past_key_30_in",
255
+ "past_value_30_in",
256
+ "past_key_31_in",
257
+ "past_value_31_in",
258
+ "past_key_25_in",
259
+ "past_value_25_in",
260
+ "past_key_26_in",
261
+ "past_value_26_in",
262
+ "past_key_27_in",
263
+ "past_value_27_in",
264
+ "past_key_28_in",
265
+ "past_value_28_in",
266
+ "position_ids_cos",
267
+ "position_ids_sin",
268
+ "attention_mask"
269
+ ],
270
+ "outputs": [
271
+ "past_value_24_out",
272
+ "past_key_24_out",
273
+ "past_value_25_out",
274
+ "past_key_25_out",
275
+ "past_value_26_out",
276
+ "past_key_26_out",
277
+ "past_value_27_out",
278
+ "past_key_27_out",
279
+ "past_value_28_out",
280
+ "past_key_28_out",
281
+ "past_value_29_out",
282
+ "past_key_29_out",
283
+ "past_value_30_out",
284
+ "past_key_30_out",
285
+ "past_value_31_out",
286
+ "past_key_31_out",
287
+ "logits"
288
+ ],
289
+ "session_options": {
290
+ "log_id": "onnxruntime-genai.pp4",
291
+ "provider_options": [
292
+ {
293
+ "qnn": {
294
+ "backend_path": "QnnHtp.dll",
295
+ "htp_performance_mode": "burst",
296
+ "enable_htp_shared_memory_allocator": "1",
297
+ "qnn_context_priority": "high"
298
+ }
299
+ }
300
+ ]
301
+ },
302
+ "run_on_token_gen": false
303
+ },
304
+ "token-generator-1": {
305
+ "filename": "ar1_cl4096_1_of_4_qnn_ctx.onnx",
306
+ "inputs": [
307
+ "input_ids",
308
+ "past_key_0_in",
309
+ "past_key_5_in",
310
+ "past_value_5_in",
311
+ "past_value_0_in",
312
+ "past_key_6_in",
313
+ "past_value_6_in",
314
+ "past_key_7_in",
315
+ "past_value_7_in",
316
+ "past_key_1_in",
317
+ "past_value_1_in",
318
+ "past_key_2_in",
319
+ "past_value_2_in",
320
+ "past_key_3_in",
321
+ "past_value_3_in",
322
+ "past_key_4_in",
323
+ "past_value_4_in",
324
+ "position_ids_cos",
325
+ "position_ids_sin",
326
+ "attention_mask"
327
+ ],
328
+ "outputs": [
329
+ "past_value_0_out",
330
+ "past_key_0_out",
331
+ "past_value_1_out",
332
+ "past_key_1_out",
333
+ "past_value_2_out",
334
+ "past_key_2_out",
335
+ "past_value_3_out",
336
+ "past_key_3_out",
337
+ "past_value_4_out",
338
+ "past_key_4_out",
339
+ "past_value_5_out",
340
+ "past_key_5_out",
341
+ "past_value_6_out",
342
+ "past_key_6_out",
343
+ "past_value_7_out",
344
+ "past_key_7_out",
345
+ "_model_layers_7_Add_1_Add_output_0"
346
+ ],
347
+ "session_options": {
348
+ "log_id": "onnxruntime-genai.tg1",
349
+ "provider_options": [
350
+ {
351
+ "qnn": {
352
+ "backend_path": "QnnHtp.dll",
353
+ "htp_performance_mode": "burst",
354
+ "enable_htp_shared_memory_allocator": "1",
355
+ "qnn_context_priority": "high"
356
+ }
357
+ }
358
+ ]
359
+ },
360
+ "run_on_prompt": false
361
+ },
362
+ "token-generator-2": {
363
+ "filename": "ar1_cl4096_2_of_4_qnn_ctx.onnx",
364
+ "inputs": [
365
+ "_model_layers_7_Add_1_Add_output_0",
366
+ "past_key_8_in",
367
+ "past_key_13_in",
368
+ "past_value_13_in",
369
+ "past_value_8_in",
370
+ "past_key_14_in",
371
+ "past_value_14_in",
372
+ "past_key_15_in",
373
+ "past_value_15_in",
374
+ "past_key_9_in",
375
+ "past_value_9_in",
376
+ "past_key_10_in",
377
+ "past_value_10_in",
378
+ "past_key_11_in",
379
+ "past_value_11_in",
380
+ "past_key_12_in",
381
+ "past_value_12_in",
382
+ "position_ids_cos",
383
+ "position_ids_sin",
384
+ "attention_mask"
385
+ ],
386
+ "outputs": [
387
+ "past_value_8_out",
388
+ "past_key_8_out",
389
+ "past_value_9_out",
390
+ "past_key_9_out",
391
+ "past_value_10_out",
392
+ "past_key_10_out",
393
+ "past_value_11_out",
394
+ "past_key_11_out",
395
+ "past_value_12_out",
396
+ "past_key_12_out",
397
+ "past_value_13_out",
398
+ "past_key_13_out",
399
+ "past_value_14_out",
400
+ "past_key_14_out",
401
+ "past_value_15_out",
402
+ "past_key_15_out",
403
+ "_model_layers_15_Add_1_Add_output_0"
404
+ ],
405
+ "session_options": {
406
+ "log_id": "onnxruntime-genai.tg2",
407
+ "provider_options": [
408
+ {
409
+ "qnn": {
410
+ "backend_path": "QnnHtp.dll",
411
+ "htp_performance_mode": "burst",
412
+ "enable_htp_shared_memory_allocator": "1",
413
+ "qnn_context_priority": "high"
414
+ }
415
+ }
416
+ ]
417
+ },
418
+ "run_on_prompt": false
419
+ },
420
+ "token-generator-3": {
421
+ "filename": "ar1_cl4096_3_of_4_qnn_ctx.onnx",
422
+ "inputs": [
423
+ "_model_layers_15_Add_1_Add_output_0",
424
+ "past_key_16_in",
425
+ "past_key_21_in",
426
+ "past_value_21_in",
427
+ "past_value_16_in",
428
+ "past_key_22_in",
429
+ "past_value_22_in",
430
+ "past_key_23_in",
431
+ "past_value_23_in",
432
+ "past_key_17_in",
433
+ "past_value_17_in",
434
+ "past_key_18_in",
435
+ "past_value_18_in",
436
+ "past_key_19_in",
437
+ "past_value_19_in",
438
+ "past_key_20_in",
439
+ "past_value_20_in",
440
+ "position_ids_cos",
441
+ "position_ids_sin",
442
+ "attention_mask"
443
+ ],
444
+ "outputs": [
445
+ "past_value_16_out",
446
+ "past_key_16_out",
447
+ "past_value_17_out",
448
+ "past_key_17_out",
449
+ "past_value_18_out",
450
+ "past_key_18_out",
451
+ "past_value_19_out",
452
+ "past_key_19_out",
453
+ "past_value_20_out",
454
+ "past_key_20_out",
455
+ "past_value_21_out",
456
+ "past_key_21_out",
457
+ "past_value_22_out",
458
+ "past_key_22_out",
459
+ "past_value_23_out",
460
+ "past_key_23_out",
461
+ "_model_layers_23_Add_1_Add_output_0"
462
+ ],
463
+ "session_options": {
464
+ "log_id": "onnxruntime-genai.tg3",
465
+ "provider_options": [
466
+ {
467
+ "qnn": {
468
+ "backend_path": "QnnHtp.dll",
469
+ "htp_performance_mode": "burst",
470
+ "enable_htp_shared_memory_allocator": "1",
471
+ "qnn_context_priority": "high"
472
+ }
473
+ }
474
+ ]
475
+ },
476
+ "run_on_prompt": false
477
+ },
478
+ "token-generator-4": {
479
+ "filename": "ar1_cl4096_4_of_4_qnn_ctx.onnx",
480
+ "inputs": [
481
+ "_model_layers_23_Add_1_Add_output_0",
482
+ "past_key_24_in",
483
+ "past_key_29_in",
484
+ "past_value_29_in",
485
+ "past_value_24_in",
486
+ "past_key_30_in",
487
+ "past_value_30_in",
488
+ "past_key_31_in",
489
+ "past_value_31_in",
490
+ "past_key_25_in",
491
+ "past_value_25_in",
492
+ "past_key_26_in",
493
+ "past_value_26_in",
494
+ "past_key_27_in",
495
+ "past_value_27_in",
496
+ "past_key_28_in",
497
+ "past_value_28_in",
498
+ "position_ids_cos",
499
+ "position_ids_sin",
500
+ "attention_mask"
501
+ ],
502
+ "outputs": [
503
+ "past_value_24_out",
504
+ "past_key_24_out",
505
+ "past_value_25_out",
506
+ "past_key_25_out",
507
+ "past_value_26_out",
508
+ "past_key_26_out",
509
+ "past_value_27_out",
510
+ "past_key_27_out",
511
+ "past_value_28_out",
512
+ "past_key_28_out",
513
+ "past_value_29_out",
514
+ "past_key_29_out",
515
+ "past_value_30_out",
516
+ "past_key_30_out",
517
+ "past_value_31_out",
518
+ "past_key_31_out",
519
+ "logits"
520
+ ],
521
+ "session_options": {
522
+ "log_id": "onnxruntime-genai.tg4",
523
+ "provider_options": [
524
+ {
525
+ "qnn": {
526
+ "backend_path": "QnnHtp.dll",
527
+ "htp_performance_mode": "burst",
528
+ "enable_htp_shared_memory_allocator": "1",
529
+ "qnn_context_priority": "high"
530
+ }
531
+ }
532
+ ]
533
+ },
534
+ "run_on_prompt": false
535
+ },
536
+ "dequantizer": {
537
+ "filename": "dequantizer.onnx",
538
+ "inputs": [
539
+ "logits"
540
+ ],
541
+ "outputs": [
542
+ "logits_dequantized"
543
+ ],
544
+ "session_options": {
545
+ "log_id": "onnxruntime-genai.dequantizer",
546
+ "provider_options": [
547
+ {}
548
+ ]
549
+ }
550
+ }
551
+ }
552
+ ]
553
+ },
554
+ "eos_token_id": [
555
+ 32007,
556
+ 32001,
557
+ 32000,
558
+ 2
559
+ ],
560
+ "pad_token_id": 32000,
561
+ "type": "decoder-pipeline",
562
+ "vocab_size": 32064
563
+ },
564
+ "search": {
565
+ "diversity_penalty": 0.0,
566
+ "do_sample": true,
567
+ "early_stopping": true,
568
+ "length_penalty": 1.0,
569
+ "max_length": 2048,
570
+ "min_length": 0,
571
+ "no_repeat_ngram_size": 0,
572
+ "num_beams": 1,
573
+ "num_return_sequences": 1,
574
+ "past_present_share_buffer": true,
575
+ "repetition_penalty": 1.0,
576
+ "temperature": 0.6,
577
+ "top_k": 1,
578
+ "top_p": 1.0
579
+ }
580
+ }
genai_config_removed_sliding_window.json ADDED
@@ -0,0 +1,576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 4096,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": [],
9
+ "log_severity_level": 0
10
+ },
11
+ "filename": "model.onnx",
12
+ "head_size": 96,
13
+ "hidden_size": 3072,
14
+ "inputs": {
15
+ "input_ids": "input_ids",
16
+ "attention_mask": "attention_mask_before_processor",
17
+ "position_ids": "position_ids",
18
+ "past_key_names": "past_key_%d_in",
19
+ "past_value_names": "past_value_%d_in"
20
+ },
21
+ "outputs": {
22
+ "logits": "logits_dequantized",
23
+ "present_key_names": "past_key_%d_out",
24
+ "present_value_names": "past_value_%d_out"
25
+ },
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 32,
28
+ "num_key_value_heads": 32,
29
+ "pipeline": [
30
+ {
31
+ "position_processor": {
32
+ "filename": "position-processor.onnx",
33
+ "inputs": [
34
+ "attention_mask_before_processor",
35
+ "position_ids"
36
+ ],
37
+ "outputs": [
38
+ "attention_mask_before_quantizer",
39
+ "position_ids_cos_before_quantizer",
40
+ "position_ids_sin_before_quantizer"
41
+ ],
42
+ "session_options": {
43
+ "log_id": "onnxruntime-genai.position_processor",
44
+ "provider_options": [
45
+ {}
46
+ ]
47
+ }
48
+ },
49
+ "quantizer": {
50
+ "filename": "quantizer.onnx",
51
+ "inputs": [
52
+ "attention_mask_before_quantizer",
53
+ "position_ids_cos_before_quantizer",
54
+ "position_ids_sin_before_quantizer"
55
+ ],
56
+ "outputs": [
57
+ "attention_mask",
58
+ "position_ids_cos",
59
+ "position_ids_sin"
60
+ ],
61
+ "session_options": {
62
+ "log_id": "onnxruntime-genai.quantizer",
63
+ "provider_options": [
64
+ {}
65
+ ]
66
+ }
67
+ },
68
+ "prompt-processor-1": {
69
+ "filename": "ar128_cl4096_1_of_4_qnn_ctx.onnx",
70
+ "inputs": [
71
+ "input_ids",
72
+ "past_key_0_in",
73
+ "past_key_5_in",
74
+ "past_value_5_in",
75
+ "past_value_0_in",
76
+ "past_key_6_in",
77
+ "past_value_6_in",
78
+ "past_key_7_in",
79
+ "past_value_7_in",
80
+ "past_key_1_in",
81
+ "past_value_1_in",
82
+ "past_key_2_in",
83
+ "past_value_2_in",
84
+ "past_key_3_in",
85
+ "past_value_3_in",
86
+ "past_key_4_in",
87
+ "past_value_4_in",
88
+ "position_ids_cos",
89
+ "position_ids_sin",
90
+ "attention_mask"
91
+ ],
92
+ "outputs": [
93
+ "past_value_0_out",
94
+ "past_key_0_out",
95
+ "past_value_1_out",
96
+ "past_key_1_out",
97
+ "past_value_2_out",
98
+ "past_key_2_out",
99
+ "past_value_3_out",
100
+ "past_key_3_out",
101
+ "past_value_4_out",
102
+ "past_key_4_out",
103
+ "past_value_5_out",
104
+ "past_key_5_out",
105
+ "past_value_6_out",
106
+ "past_key_6_out",
107
+ "past_value_7_out",
108
+ "past_key_7_out",
109
+ "_model_layers_7_Add_1_Add_output_0"
110
+ ],
111
+ "session_options": {
112
+ "log_id": "onnxruntime-genai.pp1",
113
+ "provider_options": [
114
+ {
115
+ "qnn": {
116
+ "backend_path": "QnnHtp.dll",
117
+ "htp_performance_mode": "burst",
118
+ "enable_htp_shared_memory_allocator": "1",
119
+ "qnn_context_priority": "high"
120
+ }
121
+ }
122
+ ]
123
+ },
124
+ "run_on_token_gen": false
125
+ },
126
+ "prompt-processor-2": {
127
+ "filename": "ar128_cl4096_2_of_4_qnn_ctx.onnx",
128
+ "inputs": [
129
+ "_model_layers_7_Add_1_Add_output_0",
130
+ "past_key_8_in",
131
+ "past_key_13_in",
132
+ "past_value_13_in",
133
+ "past_value_8_in",
134
+ "past_key_14_in",
135
+ "past_value_14_in",
136
+ "past_key_15_in",
137
+ "past_value_15_in",
138
+ "past_key_9_in",
139
+ "past_value_9_in",
140
+ "past_key_10_in",
141
+ "past_value_10_in",
142
+ "past_key_11_in",
143
+ "past_value_11_in",
144
+ "past_key_12_in",
145
+ "past_value_12_in",
146
+ "position_ids_cos",
147
+ "position_ids_sin",
148
+ "attention_mask"
149
+ ],
150
+ "outputs": [
151
+ "past_value_8_out",
152
+ "past_key_8_out",
153
+ "past_value_9_out",
154
+ "past_key_9_out",
155
+ "past_value_10_out",
156
+ "past_key_10_out",
157
+ "past_value_11_out",
158
+ "past_key_11_out",
159
+ "past_value_12_out",
160
+ "past_key_12_out",
161
+ "past_value_13_out",
162
+ "past_key_13_out",
163
+ "past_value_14_out",
164
+ "past_key_14_out",
165
+ "past_value_15_out",
166
+ "past_key_15_out",
167
+ "_model_layers_15_Add_1_Add_output_0"
168
+ ],
169
+ "session_options": {
170
+ "log_id": "onnxruntime-genai.pp2",
171
+ "provider_options": [
172
+ {
173
+ "qnn": {
174
+ "backend_path": "QnnHtp.dll",
175
+ "htp_performance_mode": "burst",
176
+ "enable_htp_shared_memory_allocator": "1",
177
+ "qnn_context_priority": "high"
178
+ }
179
+ }
180
+ ]
181
+ },
182
+ "run_on_token_gen": false
183
+ },
184
+ "prompt-processor-3": {
185
+ "filename": "ar128_cl4096_3_of_4_qnn_ctx.onnx",
186
+ "inputs": [
187
+ "_model_layers_15_Add_1_Add_output_0",
188
+ "past_key_16_in",
189
+ "past_key_21_in",
190
+ "past_value_21_in",
191
+ "past_value_16_in",
192
+ "past_key_22_in",
193
+ "past_value_22_in",
194
+ "past_key_23_in",
195
+ "past_value_23_in",
196
+ "past_key_17_in",
197
+ "past_value_17_in",
198
+ "past_key_18_in",
199
+ "past_value_18_in",
200
+ "past_key_19_in",
201
+ "past_value_19_in",
202
+ "past_key_20_in",
203
+ "past_value_20_in",
204
+ "position_ids_cos",
205
+ "position_ids_sin",
206
+ "attention_mask"
207
+ ],
208
+ "outputs": [
209
+ "past_value_16_out",
210
+ "past_key_16_out",
211
+ "past_value_17_out",
212
+ "past_key_17_out",
213
+ "past_value_18_out",
214
+ "past_key_18_out",
215
+ "past_value_19_out",
216
+ "past_key_19_out",
217
+ "past_value_20_out",
218
+ "past_key_20_out",
219
+ "past_value_21_out",
220
+ "past_key_21_out",
221
+ "past_value_22_out",
222
+ "past_key_22_out",
223
+ "past_value_23_out",
224
+ "past_key_23_out",
225
+ "_model_layers_23_Add_1_Add_output_0"
226
+ ],
227
+ "session_options": {
228
+ "log_id": "onnxruntime-genai.pp3",
229
+ "provider_options": [
230
+ {
231
+ "qnn": {
232
+ "backend_path": "QnnHtp.dll",
233
+ "htp_performance_mode": "burst",
234
+ "enable_htp_shared_memory_allocator": "1",
235
+ "qnn_context_priority": "high"
236
+ }
237
+ }
238
+ ]
239
+ },
240
+ "run_on_token_gen": false
241
+ },
242
+ "prompt-processor-4": {
243
+ "filename": "ar128_cl4096_4_of_4_qnn_ctx.onnx",
244
+ "inputs": [
245
+ "_model_layers_23_Add_1_Add_output_0",
246
+ "past_key_24_in",
247
+ "past_key_29_in",
248
+ "past_value_29_in",
249
+ "past_value_24_in",
250
+ "past_key_30_in",
251
+ "past_value_30_in",
252
+ "past_key_31_in",
253
+ "past_value_31_in",
254
+ "past_key_25_in",
255
+ "past_value_25_in",
256
+ "past_key_26_in",
257
+ "past_value_26_in",
258
+ "past_key_27_in",
259
+ "past_value_27_in",
260
+ "past_key_28_in",
261
+ "past_value_28_in",
262
+ "position_ids_cos",
263
+ "position_ids_sin",
264
+ "attention_mask"
265
+ ],
266
+ "outputs": [
267
+ "past_value_24_out",
268
+ "past_key_24_out",
269
+ "past_value_25_out",
270
+ "past_key_25_out",
271
+ "past_value_26_out",
272
+ "past_key_26_out",
273
+ "past_value_27_out",
274
+ "past_key_27_out",
275
+ "past_value_28_out",
276
+ "past_key_28_out",
277
+ "past_value_29_out",
278
+ "past_key_29_out",
279
+ "past_value_30_out",
280
+ "past_key_30_out",
281
+ "past_value_31_out",
282
+ "past_key_31_out",
283
+ "logits"
284
+ ],
285
+ "session_options": {
286
+ "log_id": "onnxruntime-genai.pp4",
287
+ "provider_options": [
288
+ {
289
+ "qnn": {
290
+ "backend_path": "QnnHtp.dll",
291
+ "htp_performance_mode": "burst",
292
+ "enable_htp_shared_memory_allocator": "1",
293
+ "qnn_context_priority": "high"
294
+ }
295
+ }
296
+ ]
297
+ },
298
+ "run_on_token_gen": false
299
+ },
300
+ "token-generator-1": {
301
+ "filename": "ar1_cl4096_1_of_4_qnn_ctx.onnx",
302
+ "inputs": [
303
+ "input_ids",
304
+ "past_key_0_in",
305
+ "past_key_5_in",
306
+ "past_value_5_in",
307
+ "past_value_0_in",
308
+ "past_key_6_in",
309
+ "past_value_6_in",
310
+ "past_key_7_in",
311
+ "past_value_7_in",
312
+ "past_key_1_in",
313
+ "past_value_1_in",
314
+ "past_key_2_in",
315
+ "past_value_2_in",
316
+ "past_key_3_in",
317
+ "past_value_3_in",
318
+ "past_key_4_in",
319
+ "past_value_4_in",
320
+ "position_ids_cos",
321
+ "position_ids_sin",
322
+ "attention_mask"
323
+ ],
324
+ "outputs": [
325
+ "past_value_0_out",
326
+ "past_key_0_out",
327
+ "past_value_1_out",
328
+ "past_key_1_out",
329
+ "past_value_2_out",
330
+ "past_key_2_out",
331
+ "past_value_3_out",
332
+ "past_key_3_out",
333
+ "past_value_4_out",
334
+ "past_key_4_out",
335
+ "past_value_5_out",
336
+ "past_key_5_out",
337
+ "past_value_6_out",
338
+ "past_key_6_out",
339
+ "past_value_7_out",
340
+ "past_key_7_out",
341
+ "_model_layers_7_Add_1_Add_output_0"
342
+ ],
343
+ "session_options": {
344
+ "log_id": "onnxruntime-genai.tg1",
345
+ "provider_options": [
346
+ {
347
+ "qnn": {
348
+ "backend_path": "QnnHtp.dll",
349
+ "htp_performance_mode": "burst",
350
+ "enable_htp_shared_memory_allocator": "1",
351
+ "qnn_context_priority": "high"
352
+ }
353
+ }
354
+ ]
355
+ },
356
+ "run_on_prompt": false
357
+ },
358
+ "token-generator-2": {
359
+ "filename": "ar1_cl4096_2_of_4_qnn_ctx.onnx",
360
+ "inputs": [
361
+ "_model_layers_7_Add_1_Add_output_0",
362
+ "past_key_8_in",
363
+ "past_key_13_in",
364
+ "past_value_13_in",
365
+ "past_value_8_in",
366
+ "past_key_14_in",
367
+ "past_value_14_in",
368
+ "past_key_15_in",
369
+ "past_value_15_in",
370
+ "past_key_9_in",
371
+ "past_value_9_in",
372
+ "past_key_10_in",
373
+ "past_value_10_in",
374
+ "past_key_11_in",
375
+ "past_value_11_in",
376
+ "past_key_12_in",
377
+ "past_value_12_in",
378
+ "position_ids_cos",
379
+ "position_ids_sin",
380
+ "attention_mask"
381
+ ],
382
+ "outputs": [
383
+ "past_value_8_out",
384
+ "past_key_8_out",
385
+ "past_value_9_out",
386
+ "past_key_9_out",
387
+ "past_value_10_out",
388
+ "past_key_10_out",
389
+ "past_value_11_out",
390
+ "past_key_11_out",
391
+ "past_value_12_out",
392
+ "past_key_12_out",
393
+ "past_value_13_out",
394
+ "past_key_13_out",
395
+ "past_value_14_out",
396
+ "past_key_14_out",
397
+ "past_value_15_out",
398
+ "past_key_15_out",
399
+ "_model_layers_15_Add_1_Add_output_0"
400
+ ],
401
+ "session_options": {
402
+ "log_id": "onnxruntime-genai.tg2",
403
+ "provider_options": [
404
+ {
405
+ "qnn": {
406
+ "backend_path": "QnnHtp.dll",
407
+ "htp_performance_mode": "burst",
408
+ "enable_htp_shared_memory_allocator": "1",
409
+ "qnn_context_priority": "high"
410
+ }
411
+ }
412
+ ]
413
+ },
414
+ "run_on_prompt": false
415
+ },
416
+ "token-generator-3": {
417
+ "filename": "ar1_cl4096_3_of_4_qnn_ctx.onnx",
418
+ "inputs": [
419
+ "_model_layers_15_Add_1_Add_output_0",
420
+ "past_key_16_in",
421
+ "past_key_21_in",
422
+ "past_value_21_in",
423
+ "past_value_16_in",
424
+ "past_key_22_in",
425
+ "past_value_22_in",
426
+ "past_key_23_in",
427
+ "past_value_23_in",
428
+ "past_key_17_in",
429
+ "past_value_17_in",
430
+ "past_key_18_in",
431
+ "past_value_18_in",
432
+ "past_key_19_in",
433
+ "past_value_19_in",
434
+ "past_key_20_in",
435
+ "past_value_20_in",
436
+ "position_ids_cos",
437
+ "position_ids_sin",
438
+ "attention_mask"
439
+ ],
440
+ "outputs": [
441
+ "past_value_16_out",
442
+ "past_key_16_out",
443
+ "past_value_17_out",
444
+ "past_key_17_out",
445
+ "past_value_18_out",
446
+ "past_key_18_out",
447
+ "past_value_19_out",
448
+ "past_key_19_out",
449
+ "past_value_20_out",
450
+ "past_key_20_out",
451
+ "past_value_21_out",
452
+ "past_key_21_out",
453
+ "past_value_22_out",
454
+ "past_key_22_out",
455
+ "past_value_23_out",
456
+ "past_key_23_out",
457
+ "_model_layers_23_Add_1_Add_output_0"
458
+ ],
459
+ "session_options": {
460
+ "log_id": "onnxruntime-genai.tg3",
461
+ "provider_options": [
462
+ {
463
+ "qnn": {
464
+ "backend_path": "QnnHtp.dll",
465
+ "htp_performance_mode": "burst",
466
+ "enable_htp_shared_memory_allocator": "1",
467
+ "qnn_context_priority": "high"
468
+ }
469
+ }
470
+ ]
471
+ },
472
+ "run_on_prompt": false
473
+ },
474
+ "token-generator-4": {
475
+ "filename": "ar1_cl4096_4_of_4_qnn_ctx.onnx",
476
+ "inputs": [
477
+ "_model_layers_23_Add_1_Add_output_0",
478
+ "past_key_24_in",
479
+ "past_key_29_in",
480
+ "past_value_29_in",
481
+ "past_value_24_in",
482
+ "past_key_30_in",
483
+ "past_value_30_in",
484
+ "past_key_31_in",
485
+ "past_value_31_in",
486
+ "past_key_25_in",
487
+ "past_value_25_in",
488
+ "past_key_26_in",
489
+ "past_value_26_in",
490
+ "past_key_27_in",
491
+ "past_value_27_in",
492
+ "past_key_28_in",
493
+ "past_value_28_in",
494
+ "position_ids_cos",
495
+ "position_ids_sin",
496
+ "attention_mask"
497
+ ],
498
+ "outputs": [
499
+ "past_value_24_out",
500
+ "past_key_24_out",
501
+ "past_value_25_out",
502
+ "past_key_25_out",
503
+ "past_value_26_out",
504
+ "past_key_26_out",
505
+ "past_value_27_out",
506
+ "past_key_27_out",
507
+ "past_value_28_out",
508
+ "past_key_28_out",
509
+ "past_value_29_out",
510
+ "past_key_29_out",
511
+ "past_value_30_out",
512
+ "past_key_30_out",
513
+ "past_value_31_out",
514
+ "past_key_31_out",
515
+ "logits"
516
+ ],
517
+ "session_options": {
518
+ "log_id": "onnxruntime-genai.tg4",
519
+ "provider_options": [
520
+ {
521
+ "qnn": {
522
+ "backend_path": "QnnHtp.dll",
523
+ "htp_performance_mode": "burst",
524
+ "enable_htp_shared_memory_allocator": "1",
525
+ "qnn_context_priority": "high"
526
+ }
527
+ }
528
+ ]
529
+ },
530
+ "run_on_prompt": false
531
+ },
532
+ "dequantizer": {
533
+ "filename": "dequantizer.onnx",
534
+ "inputs": [
535
+ "logits"
536
+ ],
537
+ "outputs": [
538
+ "logits_dequantized"
539
+ ],
540
+ "session_options": {
541
+ "log_id": "onnxruntime-genai.dequantizer",
542
+ "provider_options": [
543
+ {}
544
+ ]
545
+ }
546
+ }
547
+ }
548
+ ]
549
+ },
550
+ "eos_token_id": [
551
+ 32007,
552
+ 32001,
553
+ 32000,
554
+ 2
555
+ ],
556
+ "pad_token_id": 32000,
557
+ "type": "decoder-pipeline",
558
+ "vocab_size": 32064
559
+ },
560
+ "search": {
561
+ "diversity_penalty": 0.0,
562
+ "do_sample": true,
563
+ "early_stopping": true,
564
+ "length_penalty": 1.0,
565
+ "max_length": 2048,
566
+ "min_length": 0,
567
+ "no_repeat_ngram_size": 0,
568
+ "num_beams": 1,
569
+ "num_return_sequences": 1,
570
+ "past_present_share_buffer": true,
571
+ "repetition_penalty": 1.0,
572
+ "temperature": 0.6,
573
+ "top_k": 1,
574
+ "top_p": 1.0
575
+ }
576
+ }
position-processor.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c339bfd62cdc452be4e5f31efe9681fe7e9635163bb7ca8a56080fdad651202f
3
+ size 15327
quantizer.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af353f75fd7f683304f66fc2e95e89ba12faed2ced42e2ce7a5ba8602b546bf
3
+ size 1147
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "32006": {
79
+ "content": "<|system|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "32007": {
87
+ "content": "<|end|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": true,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "32010": {
111
+ "content": "<|user|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": true,
115
+ "single_word": false,
116
+ "special": true
117
+ }
118
+ },
119
+ "bos_token": "<s>",
120
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
121
+ "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|endoftext|>",
123
+ "legacy": false,
124
+ "model_max_length": 131072,
125
+ "pad_token": "<|endoftext|>",
126
+ "padding_side": "left",
127
+ "sp_model_kwargs": {},
128
+ "tokenizer_class": "LlamaTokenizer",
129
+ "unk_token": "<unk>",
130
+ "use_default_system_prompt": false
131
+ }
weight_sharing_model_1_of_4.serialized.json ADDED
The diff for this file is too large to render. See raw diff
 
weight_sharing_model_2_of_4.serialized.json ADDED
The diff for this file is too large to render. See raw diff
 
weight_sharing_model_3_of_4.serialized.json ADDED
The diff for this file is too large to render. See raw diff
 
weight_sharing_model_4_of_4.serialized.json ADDED
The diff for this file is too large to render. See raw diff