phi-3.5-onnx-qnn / weight_sharing_model_4_of_4.serialized.json
doberst's picture
Upload 20 files
7f81323 verified
{
"version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3",
"info": {
"backendId": 6,
"buildId": "v2.28.0.241029232508_102474",
"coreApiVersion": "2.21.0",
"backendApiVersion": "5.28.0",
"socVersion": "",
"contextBlobVersion": "3.2.0",
"contextBlobSize": 566494808,
"numContextTensors": 0,
"contextTensors": [],
"numGraphs": 2,
"graphs": [
{
"version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3",
"info": {
"graphName": "ar128_cl4096_4_of_4",
"numGraphInputs": 20,
"graphInputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1,
"name": "past_key_24_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16835589706897736,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 6,
"name": "past_key_29_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17043833434581757,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 9,
"name": "past_value_29_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09825660288333893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16,
"name": "past_value_24_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07847104221582413,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 125,
"name": "past_key_30_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.2074936181306839,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 128,
"name": "past_value_30_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.10411321371793747,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 267,
"name": "past_key_31_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.18935787677764893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 270,
"name": "past_value_31_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.19053252041339875,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 441,
"name": "past_key_25_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1846073865890503,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 444,
"name": "past_value_25_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09766243398189545,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 583,
"name": "past_key_26_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15839417278766633,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 586,
"name": "past_value_26_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09959074854850769,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 731,
"name": "past_key_27_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15847137570381165,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 734,
"name": "past_value_27_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.08399864286184311,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 875,
"name": "past_key_28_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1828504055738449,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 878,
"name": "past_value_28_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09687352180480957,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1041,
"name": "_model_layers_23_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
128,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.191017746925354,
"offset": -41581
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1946,
"name": "position_ids_cos",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1948,
"name": "position_ids_sin",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 2653,
"name": "attention_mask",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
4096
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0007629510946571827,
"offset": -65535
}
}
}
}
],
"numGraphOutputs": 17,
"graphOutputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1913,
"name": "past_value_24_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07847104221582413,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 2556,
"name": "past_key_24_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16835589706897736,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 3676,
"name": "past_value_25_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09766243398189545,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 4317,
"name": "past_key_25_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1846073865890503,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 5436,
"name": "past_value_26_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09959074854850769,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 6077,
"name": "past_key_26_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15839417278766633,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 7196,
"name": "past_value_27_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.08399864286184311,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 7837,
"name": "past_key_27_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15847137570381165,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 8956,
"name": "past_value_28_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09687352180480957,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 9597,
"name": "past_key_28_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1828504055738449,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 10716,
"name": "past_value_29_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09825660288333893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 11357,
"name": "past_key_29_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17043833434581757,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 12476,
"name": "past_value_30_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.10411321371793747,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 13117,
"name": "past_key_30_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.2074936181306839,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 14236,
"name": "past_value_31_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.19053252041339875,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 14877,
"name": "past_key_31_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.18935787677764893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15140,
"name": "logits",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
128,
32064
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.001873409142717719,
"offset": -26111
}
}
}
}
],
"numUpdateableTensors": 0,
"updateableTensors": [],
"graphBlobInfoSize": 40,
"graphBlobInfo": [
{
"version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1",
"info": {
"spillFillBufferSize": 0,
"optimizationLevel": 3,
"vtcmSize": 8,
"htpDlbc": 0,
"numHvxThreads": 0
}
}
]
}
},
{
"version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3",
"info": {
"graphName": "ar1_cl4096_4_of_4",
"numGraphInputs": 20,
"graphInputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15141,
"name": "past_key_24_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16835589706897736,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15146,
"name": "past_key_29_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17043833434581757,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15149,
"name": "past_value_29_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09825660288333893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15156,
"name": "past_value_24_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07847104221582413,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15265,
"name": "past_key_30_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.2074936181306839,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15268,
"name": "past_value_30_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.10411321371793747,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15407,
"name": "past_key_31_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.18935787677764893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15410,
"name": "past_value_31_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.19053252041339875,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15581,
"name": "past_key_25_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1846073865890503,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15584,
"name": "past_value_25_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09766243398189545,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15723,
"name": "past_key_26_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15839417278766633,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15726,
"name": "past_value_26_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09959074854850769,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15871,
"name": "past_key_27_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15847137570381165,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15874,
"name": "past_value_27_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.08399864286184311,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16015,
"name": "past_key_28_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1828504055738449,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16018,
"name": "past_value_28_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09687352180480957,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16181,
"name": "_model_layers_23_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
1,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.191017746925354,
"offset": -41581
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17086,
"name": "position_ids_cos",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17088,
"name": "position_ids_sin",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17793,
"name": "attention_mask",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
4096
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0007629510946571827,
"offset": -65535
}
}
}
}
],
"numGraphOutputs": 17,
"graphOutputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17053,
"name": "past_value_24_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07847104221582413,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17696,
"name": "past_key_24_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16835589706897736,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 18816,
"name": "past_value_25_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09766243398189545,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 19457,
"name": "past_key_25_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1846073865890503,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 20576,
"name": "past_value_26_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09959074854850769,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 21217,
"name": "past_key_26_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15839417278766633,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 22336,
"name": "past_value_27_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.08399864286184311,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 22977,
"name": "past_key_27_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15847137570381165,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 24096,
"name": "past_value_28_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09687352180480957,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 24737,
"name": "past_key_28_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1828504055738449,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 25856,
"name": "past_value_29_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.09825660288333893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 26497,
"name": "past_key_29_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17043833434581757,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 27616,
"name": "past_value_30_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.10411321371793747,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 28257,
"name": "past_key_30_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.2074936181306839,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 29376,
"name": "past_value_31_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.19053252041339875,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 30017,
"name": "past_key_31_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.18935787677764893,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 30280,
"name": "logits",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
1,
32064
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.001873409142717719,
"offset": -26111
}
}
}
}
],
"numUpdateableTensors": 0,
"updateableTensors": [],
"graphBlobInfoSize": 40,
"graphBlobInfo": [
{
"version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1",
"info": {
"spillFillBufferSize": 0,
"optimizationLevel": 3,
"vtcmSize": 8,
"htpDlbc": 0,
"numHvxThreads": 0
}
}
]
}
}
],
"contextMetadataSize": 8,
"contextMetadata": {
"version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1",
"info": {
"dsp arch": 73
}
},
"soc model": 43
}
}