{ "model": { "bos_token_id": 1, "context_length": 4096, "decoder": { "session_options": { "log_id": "onnxruntime-genai", "provider_options": [], "log_severity_level": 0 }, "filename": "model.onnx", "head_size": 96, "hidden_size": 3072, "inputs": { "input_ids": "input_ids", "attention_mask": "attention_mask_before_processor", "position_ids": "position_ids", "past_key_names": "past_key_%d_in", "past_value_names": "past_value_%d_in" }, "outputs": { "logits": "logits_dequantized", "present_key_names": "past_key_%d_out", "present_value_names": "past_value_%d_out" }, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pipeline": [ { "position_processor": { "filename": "position-processor.onnx", "inputs": [ "attention_mask_before_processor", "position_ids" ], "outputs": [ "attention_mask_before_quantizer", "position_ids_cos_before_quantizer", "position_ids_sin_before_quantizer" ], "session_options": { "log_id": "onnxruntime-genai.position_processor", "provider_options": [ {} ] } }, "quantizer": { "filename": "quantizer.onnx", "inputs": [ "attention_mask_before_quantizer", "position_ids_cos_before_quantizer", "position_ids_sin_before_quantizer" ], "outputs": [ "attention_mask", "position_ids_cos", "position_ids_sin" ], "session_options": { "log_id": "onnxruntime-genai.quantizer", "provider_options": [ {} ] } }, "prompt-processor-1": { "filename": "ar128_cl4096_1_of_4_qnn_ctx.onnx", "inputs": [ "input_ids", "past_key_0_in", "past_key_5_in", "past_value_5_in", "past_value_0_in", "past_key_6_in", "past_value_6_in", "past_key_7_in", "past_value_7_in", "past_key_1_in", "past_value_1_in", "past_key_2_in", "past_value_2_in", "past_key_3_in", "past_value_3_in", "past_key_4_in", "past_value_4_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_0_out", "past_key_0_out", "past_value_1_out", "past_key_1_out", "past_value_2_out", "past_key_2_out", "past_value_3_out", "past_key_3_out", "past_value_4_out", "past_key_4_out", "past_value_5_out", "past_key_5_out", "past_value_6_out", "past_key_6_out", "past_value_7_out", "past_key_7_out", "_model_layers_7_Add_1_Add_output_0" ], "session_options": { "log_id": "onnxruntime-genai.pp1", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_token_gen": false }, "prompt-processor-2": { "filename": "ar128_cl4096_2_of_4_qnn_ctx.onnx", "inputs": [ "_model_layers_7_Add_1_Add_output_0", "past_key_8_in", "past_key_13_in", "past_value_13_in", "past_value_8_in", "past_key_14_in", "past_value_14_in", "past_key_15_in", "past_value_15_in", "past_key_9_in", "past_value_9_in", "past_key_10_in", "past_value_10_in", "past_key_11_in", "past_value_11_in", "past_key_12_in", "past_value_12_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_8_out", "past_key_8_out", "past_value_9_out", "past_key_9_out", "past_value_10_out", "past_key_10_out", "past_value_11_out", "past_key_11_out", "past_value_12_out", "past_key_12_out", "past_value_13_out", "past_key_13_out", "past_value_14_out", "past_key_14_out", "past_value_15_out", "past_key_15_out", "_model_layers_15_Add_1_Add_output_0" ], "session_options": { "log_id": "onnxruntime-genai.pp2", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_token_gen": false }, "prompt-processor-3": { "filename": "ar128_cl4096_3_of_4_qnn_ctx.onnx", "inputs": [ "_model_layers_15_Add_1_Add_output_0", "past_key_16_in", "past_key_21_in", "past_value_21_in", "past_value_16_in", "past_key_22_in", "past_value_22_in", "past_key_23_in", "past_value_23_in", "past_key_17_in", "past_value_17_in", "past_key_18_in", "past_value_18_in", "past_key_19_in", "past_value_19_in", "past_key_20_in", "past_value_20_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_16_out", "past_key_16_out", "past_value_17_out", "past_key_17_out", "past_value_18_out", "past_key_18_out", "past_value_19_out", "past_key_19_out", "past_value_20_out", "past_key_20_out", "past_value_21_out", "past_key_21_out", "past_value_22_out", "past_key_22_out", "past_value_23_out", "past_key_23_out", "_model_layers_23_Add_1_Add_output_0" ], "session_options": { "log_id": "onnxruntime-genai.pp3", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_token_gen": false }, "prompt-processor-4": { "filename": "ar128_cl4096_4_of_4_qnn_ctx.onnx", "inputs": [ "_model_layers_23_Add_1_Add_output_0", "past_key_24_in", "past_key_29_in", "past_value_29_in", "past_value_24_in", "past_key_30_in", "past_value_30_in", "past_key_31_in", "past_value_31_in", "past_key_25_in", "past_value_25_in", "past_key_26_in", "past_value_26_in", "past_key_27_in", "past_value_27_in", "past_key_28_in", "past_value_28_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_24_out", "past_key_24_out", "past_value_25_out", "past_key_25_out", "past_value_26_out", "past_key_26_out", "past_value_27_out", "past_key_27_out", "past_value_28_out", "past_key_28_out", "past_value_29_out", "past_key_29_out", "past_value_30_out", "past_key_30_out", "past_value_31_out", "past_key_31_out", "logits" ], "session_options": { "log_id": "onnxruntime-genai.pp4", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_token_gen": false }, "token-generator-1": { "filename": "ar1_cl4096_1_of_4_qnn_ctx.onnx", "inputs": [ "input_ids", "past_key_0_in", "past_key_5_in", "past_value_5_in", "past_value_0_in", "past_key_6_in", "past_value_6_in", "past_key_7_in", "past_value_7_in", "past_key_1_in", "past_value_1_in", "past_key_2_in", "past_value_2_in", "past_key_3_in", "past_value_3_in", "past_key_4_in", "past_value_4_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_0_out", "past_key_0_out", "past_value_1_out", "past_key_1_out", "past_value_2_out", "past_key_2_out", "past_value_3_out", "past_key_3_out", "past_value_4_out", "past_key_4_out", "past_value_5_out", "past_key_5_out", "past_value_6_out", "past_key_6_out", "past_value_7_out", "past_key_7_out", "_model_layers_7_Add_1_Add_output_0" ], "session_options": { "log_id": "onnxruntime-genai.tg1", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_prompt": false }, "token-generator-2": { "filename": "ar1_cl4096_2_of_4_qnn_ctx.onnx", "inputs": [ "_model_layers_7_Add_1_Add_output_0", "past_key_8_in", "past_key_13_in", "past_value_13_in", "past_value_8_in", "past_key_14_in", "past_value_14_in", "past_key_15_in", "past_value_15_in", "past_key_9_in", "past_value_9_in", "past_key_10_in", "past_value_10_in", "past_key_11_in", "past_value_11_in", "past_key_12_in", "past_value_12_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_8_out", "past_key_8_out", "past_value_9_out", "past_key_9_out", "past_value_10_out", "past_key_10_out", "past_value_11_out", "past_key_11_out", "past_value_12_out", "past_key_12_out", "past_value_13_out", "past_key_13_out", "past_value_14_out", "past_key_14_out", "past_value_15_out", "past_key_15_out", "_model_layers_15_Add_1_Add_output_0" ], "session_options": { "log_id": "onnxruntime-genai.tg2", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_prompt": false }, "token-generator-3": { "filename": "ar1_cl4096_3_of_4_qnn_ctx.onnx", "inputs": [ "_model_layers_15_Add_1_Add_output_0", "past_key_16_in", "past_key_21_in", "past_value_21_in", "past_value_16_in", "past_key_22_in", "past_value_22_in", "past_key_23_in", "past_value_23_in", "past_key_17_in", "past_value_17_in", "past_key_18_in", "past_value_18_in", "past_key_19_in", "past_value_19_in", "past_key_20_in", "past_value_20_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_16_out", "past_key_16_out", "past_value_17_out", "past_key_17_out", "past_value_18_out", "past_key_18_out", "past_value_19_out", "past_key_19_out", "past_value_20_out", "past_key_20_out", "past_value_21_out", "past_key_21_out", "past_value_22_out", "past_key_22_out", "past_value_23_out", "past_key_23_out", "_model_layers_23_Add_1_Add_output_0" ], "session_options": { "log_id": "onnxruntime-genai.tg3", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_prompt": false }, "token-generator-4": { "filename": "ar1_cl4096_4_of_4_qnn_ctx.onnx", "inputs": [ "_model_layers_23_Add_1_Add_output_0", "past_key_24_in", "past_key_29_in", "past_value_29_in", "past_value_24_in", "past_key_30_in", "past_value_30_in", "past_key_31_in", "past_value_31_in", "past_key_25_in", "past_value_25_in", "past_key_26_in", "past_value_26_in", "past_key_27_in", "past_value_27_in", "past_key_28_in", "past_value_28_in", "position_ids_cos", "position_ids_sin", "attention_mask" ], "outputs": [ "past_value_24_out", "past_key_24_out", "past_value_25_out", "past_key_25_out", "past_value_26_out", "past_key_26_out", "past_value_27_out", "past_key_27_out", "past_value_28_out", "past_key_28_out", "past_value_29_out", "past_key_29_out", "past_value_30_out", "past_key_30_out", "past_value_31_out", "past_key_31_out", "logits" ], "session_options": { "log_id": "onnxruntime-genai.tg4", "provider_options": [ { "qnn": { "backend_path": "QnnHtp.dll", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_prompt": false }, "dequantizer": { "filename": "dequantizer.onnx", "inputs": [ "logits" ], "outputs": [ "logits_dequantized" ], "session_options": { "log_id": "onnxruntime-genai.dequantizer", "provider_options": [ {} ] } } } ] }, "eos_token_id": [ 32007, 32001, 32000, 2 ], "pad_token_id": 32000, "type": "decoder-pipeline", "vocab_size": 32064 }, "search": { "diversity_penalty": 0.0, "do_sample": true, "early_stopping": true, "length_penalty": 1.0, "max_length": 2048, "min_length": 0, "no_repeat_ngram_size": 0, "num_beams": 1, "num_return_sequences": 1, "past_present_share_buffer": true, "repetition_penalty": 1.0, "temperature": 0.6, "top_k": 1, "top_p": 1.0 } }