doberst commited on
Commit
3d37237
·
verified ·
1 Parent(s): e791356

Update genai_config.json

Browse files
Files changed (1) hide show
  1. genai_config.json +579 -579
genai_config.json CHANGED
@@ -1,580 +1,580 @@
1
- {
2
- "model": {
3
- "bos_token_id": 1,
4
- "context_length": 4096,
5
- "decoder": {
6
- "session_options": {
7
- "log_id": "onnxruntime-genai",
8
- "provider_options": [],
9
- "log_severity_level": 0
10
- },
11
- "filename": "model.onnx",
12
- "head_size": 96,
13
- "hidden_size": 3072,
14
- "inputs": {
15
- "input_ids": "input_ids",
16
- "attention_mask": "attention_mask_before_processor",
17
- "position_ids": "position_ids",
18
- "past_key_names": "past_key_%d_in",
19
- "past_value_names": "past_value_%d_in"
20
- },
21
- "outputs": {
22
- "logits": "logits_dequantized",
23
- "present_key_names": "past_key_%d_out",
24
- "present_value_names": "past_value_%d_out"
25
- },
26
- "num_attention_heads": 32,
27
- "num_hidden_layers": 32,
28
- "num_key_value_heads": 32,
29
- "sliding_window": {
30
- "window_size": 128,
31
- "pad_value": 128
32
- },
33
- "pipeline": [
34
- {
35
- "position_processor": {
36
- "filename": "position-processor.onnx",
37
- "inputs": [
38
- "attention_mask_before_processor",
39
- "position_ids"
40
- ],
41
- "outputs": [
42
- "attention_mask_before_quantizer",
43
- "position_ids_cos_before_quantizer",
44
- "position_ids_sin_before_quantizer"
45
- ],
46
- "session_options": {
47
- "log_id": "onnxruntime-genai.position_processor",
48
- "provider_options": [
49
- {}
50
- ]
51
- }
52
- },
53
- "quantizer": {
54
- "filename": "quantizer.onnx",
55
- "inputs": [
56
- "attention_mask_before_quantizer",
57
- "position_ids_cos_before_quantizer",
58
- "position_ids_sin_before_quantizer"
59
- ],
60
- "outputs": [
61
- "attention_mask",
62
- "position_ids_cos",
63
- "position_ids_sin"
64
- ],
65
- "session_options": {
66
- "log_id": "onnxruntime-genai.quantizer",
67
- "provider_options": [
68
- {}
69
- ]
70
- }
71
- },
72
- "prompt-processor-1": {
73
- "filename": "ar128_cl4096_1_of_4_qnn_ctx.onnx",
74
- "inputs": [
75
- "input_ids",
76
- "past_key_0_in",
77
- "past_key_5_in",
78
- "past_value_5_in",
79
- "past_value_0_in",
80
- "past_key_6_in",
81
- "past_value_6_in",
82
- "past_key_7_in",
83
- "past_value_7_in",
84
- "past_key_1_in",
85
- "past_value_1_in",
86
- "past_key_2_in",
87
- "past_value_2_in",
88
- "past_key_3_in",
89
- "past_value_3_in",
90
- "past_key_4_in",
91
- "past_value_4_in",
92
- "position_ids_cos",
93
- "position_ids_sin",
94
- "attention_mask"
95
- ],
96
- "outputs": [
97
- "past_value_0_out",
98
- "past_key_0_out",
99
- "past_value_1_out",
100
- "past_key_1_out",
101
- "past_value_2_out",
102
- "past_key_2_out",
103
- "past_value_3_out",
104
- "past_key_3_out",
105
- "past_value_4_out",
106
- "past_key_4_out",
107
- "past_value_5_out",
108
- "past_key_5_out",
109
- "past_value_6_out",
110
- "past_key_6_out",
111
- "past_value_7_out",
112
- "past_key_7_out",
113
- "_model_layers_7_Add_1_Add_output_0"
114
- ],
115
- "session_options": {
116
- "log_id": "onnxruntime-genai.pp1",
117
- "provider_options": [
118
- {
119
- "qnn": {
120
- "backend_path": "QnnHtp.dll",
121
- "htp_performance_mode": "burst",
122
- "enable_htp_shared_memory_allocator": "1",
123
- "qnn_context_priority": "high"
124
- }
125
- }
126
- ]
127
- },
128
- "run_on_token_gen": false
129
- },
130
- "prompt-processor-2": {
131
- "filename": "ar128_cl4096_2_of_4_qnn_ctx.onnx",
132
- "inputs": [
133
- "_model_layers_7_Add_1_Add_output_0",
134
- "past_key_8_in",
135
- "past_key_13_in",
136
- "past_value_13_in",
137
- "past_value_8_in",
138
- "past_key_14_in",
139
- "past_value_14_in",
140
- "past_key_15_in",
141
- "past_value_15_in",
142
- "past_key_9_in",
143
- "past_value_9_in",
144
- "past_key_10_in",
145
- "past_value_10_in",
146
- "past_key_11_in",
147
- "past_value_11_in",
148
- "past_key_12_in",
149
- "past_value_12_in",
150
- "position_ids_cos",
151
- "position_ids_sin",
152
- "attention_mask"
153
- ],
154
- "outputs": [
155
- "past_value_8_out",
156
- "past_key_8_out",
157
- "past_value_9_out",
158
- "past_key_9_out",
159
- "past_value_10_out",
160
- "past_key_10_out",
161
- "past_value_11_out",
162
- "past_key_11_out",
163
- "past_value_12_out",
164
- "past_key_12_out",
165
- "past_value_13_out",
166
- "past_key_13_out",
167
- "past_value_14_out",
168
- "past_key_14_out",
169
- "past_value_15_out",
170
- "past_key_15_out",
171
- "_model_layers_15_Add_1_Add_output_0"
172
- ],
173
- "session_options": {
174
- "log_id": "onnxruntime-genai.pp2",
175
- "provider_options": [
176
- {
177
- "qnn": {
178
- "backend_path": "QnnHtp.dll",
179
- "htp_performance_mode": "burst",
180
- "enable_htp_shared_memory_allocator": "1",
181
- "qnn_context_priority": "high"
182
- }
183
- }
184
- ]
185
- },
186
- "run_on_token_gen": false
187
- },
188
- "prompt-processor-3": {
189
- "filename": "ar128_cl4096_3_of_4_qnn_ctx.onnx",
190
- "inputs": [
191
- "_model_layers_15_Add_1_Add_output_0",
192
- "past_key_16_in",
193
- "past_key_21_in",
194
- "past_value_21_in",
195
- "past_value_16_in",
196
- "past_key_22_in",
197
- "past_value_22_in",
198
- "past_key_23_in",
199
- "past_value_23_in",
200
- "past_key_17_in",
201
- "past_value_17_in",
202
- "past_key_18_in",
203
- "past_value_18_in",
204
- "past_key_19_in",
205
- "past_value_19_in",
206
- "past_key_20_in",
207
- "past_value_20_in",
208
- "position_ids_cos",
209
- "position_ids_sin",
210
- "attention_mask"
211
- ],
212
- "outputs": [
213
- "past_value_16_out",
214
- "past_key_16_out",
215
- "past_value_17_out",
216
- "past_key_17_out",
217
- "past_value_18_out",
218
- "past_key_18_out",
219
- "past_value_19_out",
220
- "past_key_19_out",
221
- "past_value_20_out",
222
- "past_key_20_out",
223
- "past_value_21_out",
224
- "past_key_21_out",
225
- "past_value_22_out",
226
- "past_key_22_out",
227
- "past_value_23_out",
228
- "past_key_23_out",
229
- "_model_layers_23_Add_1_Add_output_0"
230
- ],
231
- "session_options": {
232
- "log_id": "onnxruntime-genai.pp3",
233
- "provider_options": [
234
- {
235
- "qnn": {
236
- "backend_path": "QnnHtp.dll",
237
- "htp_performance_mode": "burst",
238
- "enable_htp_shared_memory_allocator": "1",
239
- "qnn_context_priority": "high"
240
- }
241
- }
242
- ]
243
- },
244
- "run_on_token_gen": false
245
- },
246
- "prompt-processor-4": {
247
- "filename": "ar128_cl4096_4_of_4_qnn_ctx.onnx",
248
- "inputs": [
249
- "_model_layers_23_Add_1_Add_output_0",
250
- "past_key_24_in",
251
- "past_key_29_in",
252
- "past_value_29_in",
253
- "past_value_24_in",
254
- "past_key_30_in",
255
- "past_value_30_in",
256
- "past_key_31_in",
257
- "past_value_31_in",
258
- "past_key_25_in",
259
- "past_value_25_in",
260
- "past_key_26_in",
261
- "past_value_26_in",
262
- "past_key_27_in",
263
- "past_value_27_in",
264
- "past_key_28_in",
265
- "past_value_28_in",
266
- "position_ids_cos",
267
- "position_ids_sin",
268
- "attention_mask"
269
- ],
270
- "outputs": [
271
- "past_value_24_out",
272
- "past_key_24_out",
273
- "past_value_25_out",
274
- "past_key_25_out",
275
- "past_value_26_out",
276
- "past_key_26_out",
277
- "past_value_27_out",
278
- "past_key_27_out",
279
- "past_value_28_out",
280
- "past_key_28_out",
281
- "past_value_29_out",
282
- "past_key_29_out",
283
- "past_value_30_out",
284
- "past_key_30_out",
285
- "past_value_31_out",
286
- "past_key_31_out",
287
- "logits"
288
- ],
289
- "session_options": {
290
- "log_id": "onnxruntime-genai.pp4",
291
- "provider_options": [
292
- {
293
- "qnn": {
294
- "backend_path": "QnnHtp.dll",
295
- "htp_performance_mode": "burst",
296
- "enable_htp_shared_memory_allocator": "1",
297
- "qnn_context_priority": "high"
298
- }
299
- }
300
- ]
301
- },
302
- "run_on_token_gen": false
303
- },
304
- "token-generator-1": {
305
- "filename": "ar1_cl4096_1_of_4_qnn_ctx.onnx",
306
- "inputs": [
307
- "input_ids",
308
- "past_key_0_in",
309
- "past_key_5_in",
310
- "past_value_5_in",
311
- "past_value_0_in",
312
- "past_key_6_in",
313
- "past_value_6_in",
314
- "past_key_7_in",
315
- "past_value_7_in",
316
- "past_key_1_in",
317
- "past_value_1_in",
318
- "past_key_2_in",
319
- "past_value_2_in",
320
- "past_key_3_in",
321
- "past_value_3_in",
322
- "past_key_4_in",
323
- "past_value_4_in",
324
- "position_ids_cos",
325
- "position_ids_sin",
326
- "attention_mask"
327
- ],
328
- "outputs": [
329
- "past_value_0_out",
330
- "past_key_0_out",
331
- "past_value_1_out",
332
- "past_key_1_out",
333
- "past_value_2_out",
334
- "past_key_2_out",
335
- "past_value_3_out",
336
- "past_key_3_out",
337
- "past_value_4_out",
338
- "past_key_4_out",
339
- "past_value_5_out",
340
- "past_key_5_out",
341
- "past_value_6_out",
342
- "past_key_6_out",
343
- "past_value_7_out",
344
- "past_key_7_out",
345
- "_model_layers_7_Add_1_Add_output_0"
346
- ],
347
- "session_options": {
348
- "log_id": "onnxruntime-genai.tg1",
349
- "provider_options": [
350
- {
351
- "qnn": {
352
- "backend_path": "QnnHtp.dll",
353
- "htp_performance_mode": "burst",
354
- "enable_htp_shared_memory_allocator": "1",
355
- "qnn_context_priority": "high"
356
- }
357
- }
358
- ]
359
- },
360
- "run_on_prompt": false
361
- },
362
- "token-generator-2": {
363
- "filename": "ar1_cl4096_2_of_4_qnn_ctx.onnx",
364
- "inputs": [
365
- "_model_layers_7_Add_1_Add_output_0",
366
- "past_key_8_in",
367
- "past_key_13_in",
368
- "past_value_13_in",
369
- "past_value_8_in",
370
- "past_key_14_in",
371
- "past_value_14_in",
372
- "past_key_15_in",
373
- "past_value_15_in",
374
- "past_key_9_in",
375
- "past_value_9_in",
376
- "past_key_10_in",
377
- "past_value_10_in",
378
- "past_key_11_in",
379
- "past_value_11_in",
380
- "past_key_12_in",
381
- "past_value_12_in",
382
- "position_ids_cos",
383
- "position_ids_sin",
384
- "attention_mask"
385
- ],
386
- "outputs": [
387
- "past_value_8_out",
388
- "past_key_8_out",
389
- "past_value_9_out",
390
- "past_key_9_out",
391
- "past_value_10_out",
392
- "past_key_10_out",
393
- "past_value_11_out",
394
- "past_key_11_out",
395
- "past_value_12_out",
396
- "past_key_12_out",
397
- "past_value_13_out",
398
- "past_key_13_out",
399
- "past_value_14_out",
400
- "past_key_14_out",
401
- "past_value_15_out",
402
- "past_key_15_out",
403
- "_model_layers_15_Add_1_Add_output_0"
404
- ],
405
- "session_options": {
406
- "log_id": "onnxruntime-genai.tg2",
407
- "provider_options": [
408
- {
409
- "qnn": {
410
- "backend_path": "QnnHtp.dll",
411
- "htp_performance_mode": "burst",
412
- "enable_htp_shared_memory_allocator": "1",
413
- "qnn_context_priority": "high"
414
- }
415
- }
416
- ]
417
- },
418
- "run_on_prompt": false
419
- },
420
- "token-generator-3": {
421
- "filename": "ar1_cl4096_3_of_4_qnn_ctx.onnx",
422
- "inputs": [
423
- "_model_layers_15_Add_1_Add_output_0",
424
- "past_key_16_in",
425
- "past_key_21_in",
426
- "past_value_21_in",
427
- "past_value_16_in",
428
- "past_key_22_in",
429
- "past_value_22_in",
430
- "past_key_23_in",
431
- "past_value_23_in",
432
- "past_key_17_in",
433
- "past_value_17_in",
434
- "past_key_18_in",
435
- "past_value_18_in",
436
- "past_key_19_in",
437
- "past_value_19_in",
438
- "past_key_20_in",
439
- "past_value_20_in",
440
- "position_ids_cos",
441
- "position_ids_sin",
442
- "attention_mask"
443
- ],
444
- "outputs": [
445
- "past_value_16_out",
446
- "past_key_16_out",
447
- "past_value_17_out",
448
- "past_key_17_out",
449
- "past_value_18_out",
450
- "past_key_18_out",
451
- "past_value_19_out",
452
- "past_key_19_out",
453
- "past_value_20_out",
454
- "past_key_20_out",
455
- "past_value_21_out",
456
- "past_key_21_out",
457
- "past_value_22_out",
458
- "past_key_22_out",
459
- "past_value_23_out",
460
- "past_key_23_out",
461
- "_model_layers_23_Add_1_Add_output_0"
462
- ],
463
- "session_options": {
464
- "log_id": "onnxruntime-genai.tg3",
465
- "provider_options": [
466
- {
467
- "qnn": {
468
- "backend_path": "QnnHtp.dll",
469
- "htp_performance_mode": "burst",
470
- "enable_htp_shared_memory_allocator": "1",
471
- "qnn_context_priority": "high"
472
- }
473
- }
474
- ]
475
- },
476
- "run_on_prompt": false
477
- },
478
- "token-generator-4": {
479
- "filename": "ar1_cl4096_4_of_4_qnn_ctx.onnx",
480
- "inputs": [
481
- "_model_layers_23_Add_1_Add_output_0",
482
- "past_key_24_in",
483
- "past_key_29_in",
484
- "past_value_29_in",
485
- "past_value_24_in",
486
- "past_key_30_in",
487
- "past_value_30_in",
488
- "past_key_31_in",
489
- "past_value_31_in",
490
- "past_key_25_in",
491
- "past_value_25_in",
492
- "past_key_26_in",
493
- "past_value_26_in",
494
- "past_key_27_in",
495
- "past_value_27_in",
496
- "past_key_28_in",
497
- "past_value_28_in",
498
- "position_ids_cos",
499
- "position_ids_sin",
500
- "attention_mask"
501
- ],
502
- "outputs": [
503
- "past_value_24_out",
504
- "past_key_24_out",
505
- "past_value_25_out",
506
- "past_key_25_out",
507
- "past_value_26_out",
508
- "past_key_26_out",
509
- "past_value_27_out",
510
- "past_key_27_out",
511
- "past_value_28_out",
512
- "past_key_28_out",
513
- "past_value_29_out",
514
- "past_key_29_out",
515
- "past_value_30_out",
516
- "past_key_30_out",
517
- "past_value_31_out",
518
- "past_key_31_out",
519
- "logits"
520
- ],
521
- "session_options": {
522
- "log_id": "onnxruntime-genai.tg4",
523
- "provider_options": [
524
- {
525
- "qnn": {
526
- "backend_path": "QnnHtp.dll",
527
- "htp_performance_mode": "burst",
528
- "enable_htp_shared_memory_allocator": "1",
529
- "qnn_context_priority": "high"
530
- }
531
- }
532
- ]
533
- },
534
- "run_on_prompt": false
535
- },
536
- "dequantizer": {
537
- "filename": "dequantizer.onnx",
538
- "inputs": [
539
- "logits"
540
- ],
541
- "outputs": [
542
- "logits_dequantized"
543
- ],
544
- "session_options": {
545
- "log_id": "onnxruntime-genai.dequantizer",
546
- "provider_options": [
547
- {}
548
- ]
549
- }
550
- }
551
- }
552
- ]
553
- },
554
- "eos_token_id": [
555
- 32007,
556
- 32001,
557
- 32000,
558
- 2
559
- ],
560
- "pad_token_id": 32000,
561
- "type": "decoder-pipeline",
562
- "vocab_size": 32064
563
- },
564
- "search": {
565
- "diversity_penalty": 0.0,
566
- "do_sample": true,
567
- "early_stopping": true,
568
- "length_penalty": 1.0,
569
- "max_length": 2048,
570
- "min_length": 0,
571
- "no_repeat_ngram_size": 0,
572
- "num_beams": 1,
573
- "num_return_sequences": 1,
574
- "past_present_share_buffer": true,
575
- "repetition_penalty": 1.0,
576
- "temperature": 0.6,
577
- "top_k": 1,
578
- "top_p": 1.0
579
- }
580
  }
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 4096,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": [],
9
+ "log_severity_level": 0
10
+ },
11
+ "filename": "model.onnx",
12
+ "head_size": 96,
13
+ "hidden_size": 3072,
14
+ "inputs": {
15
+ "input_ids": "input_ids",
16
+ "attention_mask": "attention_mask_before_processor",
17
+ "position_ids": "position_ids",
18
+ "past_key_names": "past_key_%d_in",
19
+ "past_value_names": "past_value_%d_in"
20
+ },
21
+ "outputs": {
22
+ "logits": "logits_dequantized",
23
+ "present_key_names": "past_key_%d_out",
24
+ "present_value_names": "past_value_%d_out"
25
+ },
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 32,
28
+ "num_key_value_heads": 32,
29
+ "sliding_window": {
30
+ "window_size": 128,
31
+ "pad_value": 128
32
+ },
33
+ "pipeline": [
34
+ {
35
+ "position_processor": {
36
+ "filename": "position-processor.onnx",
37
+ "inputs": [
38
+ "attention_mask_before_processor",
39
+ "position_ids"
40
+ ],
41
+ "outputs": [
42
+ "attention_mask_before_quantizer",
43
+ "position_ids_cos_before_quantizer",
44
+ "position_ids_sin_before_quantizer"
45
+ ],
46
+ "session_options": {
47
+ "log_id": "onnxruntime-genai.position_processor",
48
+ "provider_options": [
49
+ {}
50
+ ]
51
+ }
52
+ },
53
+ "quantizer": {
54
+ "filename": "quantizer.onnx",
55
+ "inputs": [
56
+ "attention_mask_before_quantizer",
57
+ "position_ids_cos_before_quantizer",
58
+ "position_ids_sin_before_quantizer"
59
+ ],
60
+ "outputs": [
61
+ "attention_mask",
62
+ "position_ids_cos",
63
+ "position_ids_sin"
64
+ ],
65
+ "session_options": {
66
+ "log_id": "onnxruntime-genai.quantizer",
67
+ "provider_options": [
68
+ {}
69
+ ]
70
+ }
71
+ },
72
+ "prompt-processor-1": {
73
+ "filename": "ar128_cl4096_1_of_4_qnn_ctx.onnx",
74
+ "inputs": [
75
+ "input_ids",
76
+ "past_key_0_in",
77
+ "past_key_5_in",
78
+ "past_value_5_in",
79
+ "past_value_0_in",
80
+ "past_key_6_in",
81
+ "past_value_6_in",
82
+ "past_key_7_in",
83
+ "past_value_7_in",
84
+ "past_key_1_in",
85
+ "past_value_1_in",
86
+ "past_key_2_in",
87
+ "past_value_2_in",
88
+ "past_key_3_in",
89
+ "past_value_3_in",
90
+ "past_key_4_in",
91
+ "past_value_4_in",
92
+ "position_ids_cos",
93
+ "position_ids_sin",
94
+ "attention_mask"
95
+ ],
96
+ "outputs": [
97
+ "past_value_0_out",
98
+ "past_key_0_out",
99
+ "past_value_1_out",
100
+ "past_key_1_out",
101
+ "past_value_2_out",
102
+ "past_key_2_out",
103
+ "past_value_3_out",
104
+ "past_key_3_out",
105
+ "past_value_4_out",
106
+ "past_key_4_out",
107
+ "past_value_5_out",
108
+ "past_key_5_out",
109
+ "past_value_6_out",
110
+ "past_key_6_out",
111
+ "past_value_7_out",
112
+ "past_key_7_out",
113
+ "_model_layers_7_Add_1_Add_output_0"
114
+ ],
115
+ "session_options": {
116
+ "log_id": "onnxruntime-genai.pp1",
117
+ "provider_options": [
118
+ {
119
+ "qnn": {
120
+ "backend_path": "QnnHtp.dll",
121
+ "htp_performance_mode": "burst",
122
+ "enable_htp_shared_memory_allocator": "1",
123
+ "qnn_context_priority": "high"
124
+ }
125
+ }
126
+ ]
127
+ },
128
+ "run_on_token_gen": false
129
+ },
130
+ "prompt-processor-2": {
131
+ "filename": "ar128_cl4096_2_of_4_qnn_ctx.onnx",
132
+ "inputs": [
133
+ "_model_layers_7_Add_1_Add_output_0",
134
+ "past_key_8_in",
135
+ "past_key_13_in",
136
+ "past_value_13_in",
137
+ "past_value_8_in",
138
+ "past_key_14_in",
139
+ "past_value_14_in",
140
+ "past_key_15_in",
141
+ "past_value_15_in",
142
+ "past_key_9_in",
143
+ "past_value_9_in",
144
+ "past_key_10_in",
145
+ "past_value_10_in",
146
+ "past_key_11_in",
147
+ "past_value_11_in",
148
+ "past_key_12_in",
149
+ "past_value_12_in",
150
+ "position_ids_cos",
151
+ "position_ids_sin",
152
+ "attention_mask"
153
+ ],
154
+ "outputs": [
155
+ "past_value_8_out",
156
+ "past_key_8_out",
157
+ "past_value_9_out",
158
+ "past_key_9_out",
159
+ "past_value_10_out",
160
+ "past_key_10_out",
161
+ "past_value_11_out",
162
+ "past_key_11_out",
163
+ "past_value_12_out",
164
+ "past_key_12_out",
165
+ "past_value_13_out",
166
+ "past_key_13_out",
167
+ "past_value_14_out",
168
+ "past_key_14_out",
169
+ "past_value_15_out",
170
+ "past_key_15_out",
171
+ "_model_layers_15_Add_1_Add_output_0"
172
+ ],
173
+ "session_options": {
174
+ "log_id": "onnxruntime-genai.pp2",
175
+ "provider_options": [
176
+ {
177
+ "qnn": {
178
+ "backend_path": "QnnHtp.dll",
179
+ "htp_performance_mode": "burst",
180
+ "enable_htp_shared_memory_allocator": "1",
181
+ "qnn_context_priority": "high"
182
+ }
183
+ }
184
+ ]
185
+ },
186
+ "run_on_token_gen": false
187
+ },
188
+ "prompt-processor-3": {
189
+ "filename": "ar128_cl4096_3_of_4_qnn_ctx.onnx",
190
+ "inputs": [
191
+ "_model_layers_15_Add_1_Add_output_0",
192
+ "past_key_16_in",
193
+ "past_key_21_in",
194
+ "past_value_21_in",
195
+ "past_value_16_in",
196
+ "past_key_22_in",
197
+ "past_value_22_in",
198
+ "past_key_23_in",
199
+ "past_value_23_in",
200
+ "past_key_17_in",
201
+ "past_value_17_in",
202
+ "past_key_18_in",
203
+ "past_value_18_in",
204
+ "past_key_19_in",
205
+ "past_value_19_in",
206
+ "past_key_20_in",
207
+ "past_value_20_in",
208
+ "position_ids_cos",
209
+ "position_ids_sin",
210
+ "attention_mask"
211
+ ],
212
+ "outputs": [
213
+ "past_value_16_out",
214
+ "past_key_16_out",
215
+ "past_value_17_out",
216
+ "past_key_17_out",
217
+ "past_value_18_out",
218
+ "past_key_18_out",
219
+ "past_value_19_out",
220
+ "past_key_19_out",
221
+ "past_value_20_out",
222
+ "past_key_20_out",
223
+ "past_value_21_out",
224
+ "past_key_21_out",
225
+ "past_value_22_out",
226
+ "past_key_22_out",
227
+ "past_value_23_out",
228
+ "past_key_23_out",
229
+ "_model_layers_23_Add_1_Add_output_0"
230
+ ],
231
+ "session_options": {
232
+ "log_id": "onnxruntime-genai.pp3",
233
+ "provider_options": [
234
+ {
235
+ "qnn": {
236
+ "backend_path": "QnnHtp.dll",
237
+ "htp_performance_mode": "burst",
238
+ "enable_htp_shared_memory_allocator": "1",
239
+ "qnn_context_priority": "high"
240
+ }
241
+ }
242
+ ]
243
+ },
244
+ "run_on_token_gen": false
245
+ },
246
+ "prompt-processor-4": {
247
+ "filename": "ar128_cl4096_4_of_4_qnn_ctx.onnx",
248
+ "inputs": [
249
+ "_model_layers_23_Add_1_Add_output_0",
250
+ "past_key_24_in",
251
+ "past_key_29_in",
252
+ "past_value_29_in",
253
+ "past_value_24_in",
254
+ "past_key_30_in",
255
+ "past_value_30_in",
256
+ "past_key_31_in",
257
+ "past_value_31_in",
258
+ "past_key_25_in",
259
+ "past_value_25_in",
260
+ "past_key_26_in",
261
+ "past_value_26_in",
262
+ "past_key_27_in",
263
+ "past_value_27_in",
264
+ "past_key_28_in",
265
+ "past_value_28_in",
266
+ "position_ids_cos",
267
+ "position_ids_sin",
268
+ "attention_mask"
269
+ ],
270
+ "outputs": [
271
+ "past_value_24_out",
272
+ "past_key_24_out",
273
+ "past_value_25_out",
274
+ "past_key_25_out",
275
+ "past_value_26_out",
276
+ "past_key_26_out",
277
+ "past_value_27_out",
278
+ "past_key_27_out",
279
+ "past_value_28_out",
280
+ "past_key_28_out",
281
+ "past_value_29_out",
282
+ "past_key_29_out",
283
+ "past_value_30_out",
284
+ "past_key_30_out",
285
+ "past_value_31_out",
286
+ "past_key_31_out",
287
+ "logits"
288
+ ],
289
+ "session_options": {
290
+ "log_id": "onnxruntime-genai.pp4",
291
+ "provider_options": [
292
+ {
293
+ "qnn": {
294
+ "backend_path": "QnnHtp.dll",
295
+ "htp_performance_mode": "burst",
296
+ "enable_htp_shared_memory_allocator": "1",
297
+ "qnn_context_priority": "high"
298
+ }
299
+ }
300
+ ]
301
+ },
302
+ "run_on_token_gen": false
303
+ },
304
+ "token-generator-1": {
305
+ "filename": "ar1_cl4096_1_of_4_qnn_ctx.onnx",
306
+ "inputs": [
307
+ "input_ids",
308
+ "past_key_0_in",
309
+ "past_key_5_in",
310
+ "past_value_5_in",
311
+ "past_value_0_in",
312
+ "past_key_6_in",
313
+ "past_value_6_in",
314
+ "past_key_7_in",
315
+ "past_value_7_in",
316
+ "past_key_1_in",
317
+ "past_value_1_in",
318
+ "past_key_2_in",
319
+ "past_value_2_in",
320
+ "past_key_3_in",
321
+ "past_value_3_in",
322
+ "past_key_4_in",
323
+ "past_value_4_in",
324
+ "position_ids_cos",
325
+ "position_ids_sin",
326
+ "attention_mask"
327
+ ],
328
+ "outputs": [
329
+ "past_value_0_out",
330
+ "past_key_0_out",
331
+ "past_value_1_out",
332
+ "past_key_1_out",
333
+ "past_value_2_out",
334
+ "past_key_2_out",
335
+ "past_value_3_out",
336
+ "past_key_3_out",
337
+ "past_value_4_out",
338
+ "past_key_4_out",
339
+ "past_value_5_out",
340
+ "past_key_5_out",
341
+ "past_value_6_out",
342
+ "past_key_6_out",
343
+ "past_value_7_out",
344
+ "past_key_7_out",
345
+ "_model_layers_7_Add_1_Add_output_0"
346
+ ],
347
+ "session_options": {
348
+ "log_id": "onnxruntime-genai.tg1",
349
+ "provider_options": [
350
+ {
351
+ "qnn": {
352
+ "backend_path": "QnnHtp.dll",
353
+ "htp_performance_mode": "burst",
354
+ "enable_htp_shared_memory_allocator": "1",
355
+ "qnn_context_priority": "high"
356
+ }
357
+ }
358
+ ]
359
+ },
360
+ "run_on_prompt": false
361
+ },
362
+ "token-generator-2": {
363
+ "filename": "ar1_cl4096_2_of_4_qnn_ctx.onnx",
364
+ "inputs": [
365
+ "_model_layers_7_Add_1_Add_output_0",
366
+ "past_key_8_in",
367
+ "past_key_13_in",
368
+ "past_value_13_in",
369
+ "past_value_8_in",
370
+ "past_key_14_in",
371
+ "past_value_14_in",
372
+ "past_key_15_in",
373
+ "past_value_15_in",
374
+ "past_key_9_in",
375
+ "past_value_9_in",
376
+ "past_key_10_in",
377
+ "past_value_10_in",
378
+ "past_key_11_in",
379
+ "past_value_11_in",
380
+ "past_key_12_in",
381
+ "past_value_12_in",
382
+ "position_ids_cos",
383
+ "position_ids_sin",
384
+ "attention_mask"
385
+ ],
386
+ "outputs": [
387
+ "past_value_8_out",
388
+ "past_key_8_out",
389
+ "past_value_9_out",
390
+ "past_key_9_out",
391
+ "past_value_10_out",
392
+ "past_key_10_out",
393
+ "past_value_11_out",
394
+ "past_key_11_out",
395
+ "past_value_12_out",
396
+ "past_key_12_out",
397
+ "past_value_13_out",
398
+ "past_key_13_out",
399
+ "past_value_14_out",
400
+ "past_key_14_out",
401
+ "past_value_15_out",
402
+ "past_key_15_out",
403
+ "_model_layers_15_Add_1_Add_output_0"
404
+ ],
405
+ "session_options": {
406
+ "log_id": "onnxruntime-genai.tg2",
407
+ "provider_options": [
408
+ {
409
+ "qnn": {
410
+ "backend_path": "QnnHtp.dll",
411
+ "htp_performance_mode": "burst",
412
+ "enable_htp_shared_memory_allocator": "1",
413
+ "qnn_context_priority": "high"
414
+ }
415
+ }
416
+ ]
417
+ },
418
+ "run_on_prompt": false
419
+ },
420
+ "token-generator-3": {
421
+ "filename": "ar1_cl4096_3_of_4_qnn_ctx.onnx",
422
+ "inputs": [
423
+ "_model_layers_15_Add_1_Add_output_0",
424
+ "past_key_16_in",
425
+ "past_key_21_in",
426
+ "past_value_21_in",
427
+ "past_value_16_in",
428
+ "past_key_22_in",
429
+ "past_value_22_in",
430
+ "past_key_23_in",
431
+ "past_value_23_in",
432
+ "past_key_17_in",
433
+ "past_value_17_in",
434
+ "past_key_18_in",
435
+ "past_value_18_in",
436
+ "past_key_19_in",
437
+ "past_value_19_in",
438
+ "past_key_20_in",
439
+ "past_value_20_in",
440
+ "position_ids_cos",
441
+ "position_ids_sin",
442
+ "attention_mask"
443
+ ],
444
+ "outputs": [
445
+ "past_value_16_out",
446
+ "past_key_16_out",
447
+ "past_value_17_out",
448
+ "past_key_17_out",
449
+ "past_value_18_out",
450
+ "past_key_18_out",
451
+ "past_value_19_out",
452
+ "past_key_19_out",
453
+ "past_value_20_out",
454
+ "past_key_20_out",
455
+ "past_value_21_out",
456
+ "past_key_21_out",
457
+ "past_value_22_out",
458
+ "past_key_22_out",
459
+ "past_value_23_out",
460
+ "past_key_23_out",
461
+ "_model_layers_23_Add_1_Add_output_0"
462
+ ],
463
+ "session_options": {
464
+ "log_id": "onnxruntime-genai.tg3",
465
+ "provider_options": [
466
+ {
467
+ "qnn": {
468
+ "backend_path": "QnnHtp.dll",
469
+ "htp_performance_mode": "burst",
470
+ "enable_htp_shared_memory_allocator": "1",
471
+ "qnn_context_priority": "high"
472
+ }
473
+ }
474
+ ]
475
+ },
476
+ "run_on_prompt": false
477
+ },
478
+ "token-generator-4": {
479
+ "filename": "ar1_cl4096_4_of_4_qnn_ctx.onnx",
480
+ "inputs": [
481
+ "_model_layers_23_Add_1_Add_output_0",
482
+ "past_key_24_in",
483
+ "past_key_29_in",
484
+ "past_value_29_in",
485
+ "past_value_24_in",
486
+ "past_key_30_in",
487
+ "past_value_30_in",
488
+ "past_key_31_in",
489
+ "past_value_31_in",
490
+ "past_key_25_in",
491
+ "past_value_25_in",
492
+ "past_key_26_in",
493
+ "past_value_26_in",
494
+ "past_key_27_in",
495
+ "past_value_27_in",
496
+ "past_key_28_in",
497
+ "past_value_28_in",
498
+ "position_ids_cos",
499
+ "position_ids_sin",
500
+ "attention_mask"
501
+ ],
502
+ "outputs": [
503
+ "past_value_24_out",
504
+ "past_key_24_out",
505
+ "past_value_25_out",
506
+ "past_key_25_out",
507
+ "past_value_26_out",
508
+ "past_key_26_out",
509
+ "past_value_27_out",
510
+ "past_key_27_out",
511
+ "past_value_28_out",
512
+ "past_key_28_out",
513
+ "past_value_29_out",
514
+ "past_key_29_out",
515
+ "past_value_30_out",
516
+ "past_key_30_out",
517
+ "past_value_31_out",
518
+ "past_key_31_out",
519
+ "logits"
520
+ ],
521
+ "session_options": {
522
+ "log_id": "onnxruntime-genai.tg4",
523
+ "provider_options": [
524
+ {
525
+ "qnn": {
526
+ "backend_path": "QnnHtp.dll",
527
+ "htp_performance_mode": "burst",
528
+ "enable_htp_shared_memory_allocator": "1",
529
+ "qnn_context_priority": "high"
530
+ }
531
+ }
532
+ ]
533
+ },
534
+ "run_on_prompt": false
535
+ },
536
+ "dequantizer": {
537
+ "filename": "dequantizer.onnx",
538
+ "inputs": [
539
+ "logits"
540
+ ],
541
+ "outputs": [
542
+ "logits_dequantized"
543
+ ],
544
+ "session_options": {
545
+ "log_id": "onnxruntime-genai.dequantizer",
546
+ "provider_options": [
547
+ {}
548
+ ]
549
+ }
550
+ }
551
+ }
552
+ ]
553
+ },
554
+ "eos_token_id": [
555
+ 32007,
556
+ 32001,
557
+ 32000,
558
+ 2
559
+ ],
560
+ "pad_token_id": 32000,
561
+ "type": "decoder-pipeline",
562
+ "vocab_size": 32064
563
+ },
564
+ "search": {
565
+ "diversity_penalty": 0.0,
566
+ "do_sample": false,
567
+ "early_stopping": true,
568
+ "length_penalty": 1.0,
569
+ "max_length": 2048,
570
+ "min_length": 0,
571
+ "no_repeat_ngram_size": 0,
572
+ "num_beams": 1,
573
+ "num_return_sequences": 1,
574
+ "past_present_share_buffer": true,
575
+ "repetition_penalty": 1.0,
576
+ "temperature": 0.0,
577
+ "top_k": 1,
578
+ "top_p": 1.0
579
+ }
580
  }