phi-3.5-onnx-qnn / weight_sharing_model_3_of_4.serialized.json
doberst's picture
Upload 20 files
7f81323 verified
{
"version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3",
"info": {
"backendId": 6,
"buildId": "v2.28.0.241029232508_102474",
"coreApiVersion": "2.21.0",
"backendApiVersion": "5.28.0",
"socVersion": "",
"contextBlobVersion": "3.2.0",
"contextBlobSize": 467141712,
"numContextTensors": 0,
"contextTensors": [],
"numGraphs": 2,
"graphs": [
{
"version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3",
"info": {
"graphName": "ar128_cl4096_3_of_4",
"numGraphInputs": 20,
"graphInputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1,
"name": "past_key_16_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15907233953475953,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 6,
"name": "past_key_21_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14844174683094026,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 9,
"name": "past_value_21_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07443267107009888,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16,
"name": "past_value_16_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07209276407957077,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 125,
"name": "past_key_22_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1573055237531662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 128,
"name": "past_value_22_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06790729612112045,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 267,
"name": "past_key_23_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1642313301563263,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 270,
"name": "past_value_23_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0965195819735527,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 441,
"name": "past_key_17_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17103533446788789,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 444,
"name": "past_value_17_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06805716454982758,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 583,
"name": "past_key_18_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14796161651611329,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 586,
"name": "past_value_18_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06999378651380539,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 731,
"name": "past_key_19_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15470433235168458,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 734,
"name": "past_value_19_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06925888359546662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 875,
"name": "past_key_20_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
3968
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16168494522571565,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 878,
"name": "past_value_20_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
3968,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.062198661267757419,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1041,
"name": "_model_layers_15_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
128,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1906648725271225,
"offset": -41533
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1946,
"name": "position_ids_cos",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1948,
"name": "position_ids_sin",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 2653,
"name": "attention_mask",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
128,
4096
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0007629510946571827,
"offset": -65535
}
}
}
}
],
"numGraphOutputs": 17,
"graphOutputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 1913,
"name": "past_value_16_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07209276407957077,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 2556,
"name": "past_key_16_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15907233953475953,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 3676,
"name": "past_value_17_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06805716454982758,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 4317,
"name": "past_key_17_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17103533446788789,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 5436,
"name": "past_value_18_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06999378651380539,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 6077,
"name": "past_key_18_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14796161651611329,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 7196,
"name": "past_value_19_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06925888359546662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 7837,
"name": "past_key_19_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15470433235168458,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 8956,
"name": "past_value_20_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.062198661267757419,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 9597,
"name": "past_key_20_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16168494522571565,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 10716,
"name": "past_value_21_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07443267107009888,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 11357,
"name": "past_key_21_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14844174683094026,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 12476,
"name": "past_value_22_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06790729612112045,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 13117,
"name": "past_key_22_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1573055237531662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 14236,
"name": "past_value_23_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
128,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0965195819735527,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 14877,
"name": "past_key_23_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
128
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1642313301563263,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15124,
"name": "_model_layers_23_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
128,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.191017746925354,
"offset": -41581
}
}
}
}
],
"numUpdateableTensors": 0,
"updateableTensors": [],
"graphBlobInfoSize": 40,
"graphBlobInfo": [
{
"version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1",
"info": {
"spillFillBufferSize": 0,
"optimizationLevel": 3,
"vtcmSize": 8,
"htpDlbc": 0,
"numHvxThreads": 0
}
}
]
}
},
{
"version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3",
"info": {
"graphName": "ar1_cl4096_3_of_4",
"numGraphInputs": 20,
"graphInputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15125,
"name": "past_key_16_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15907233953475953,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15130,
"name": "past_key_21_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14844174683094026,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15133,
"name": "past_value_21_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07443267107009888,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15140,
"name": "past_value_16_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07209276407957077,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15249,
"name": "past_key_22_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1573055237531662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15252,
"name": "past_value_22_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06790729612112045,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15391,
"name": "past_key_23_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1642313301563263,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15394,
"name": "past_value_23_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0965195819735527,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15565,
"name": "past_key_17_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17103533446788789,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15568,
"name": "past_value_17_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06805716454982758,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15707,
"name": "past_key_18_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14796161651611329,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15710,
"name": "past_value_18_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06999378651380539,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15855,
"name": "past_key_19_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15470433235168458,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15858,
"name": "past_value_19_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06925888359546662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 15999,
"name": "past_key_20_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
4095
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16168494522571565,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16002,
"name": "past_value_20_in",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
4095,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.062198661267757419,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 16165,
"name": "_model_layers_15_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
1,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1906648725271225,
"offset": -41533
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17070,
"name": "position_ids_cos",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17072,
"name": "position_ids_sin",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
48
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.00003632373773143627,
"offset": -32768
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17777,
"name": "attention_mask",
"type": "QNN_TENSOR_TYPE_APP_WRITE",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 4,
"dimensions": [
1,
1,
1,
4096
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0007629510946571827,
"offset": -65535
}
}
}
}
],
"numGraphOutputs": 17,
"graphOutputs": [
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17037,
"name": "past_value_16_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07209276407957077,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 17680,
"name": "past_key_16_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15907233953475953,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 18800,
"name": "past_value_17_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06805716454982758,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 19441,
"name": "past_key_17_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.17103533446788789,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 20560,
"name": "past_value_18_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06999378651380539,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 21201,
"name": "past_key_18_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14796161651611329,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 22320,
"name": "past_value_19_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06925888359546662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 22961,
"name": "past_key_19_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.15470433235168458,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 24080,
"name": "past_value_20_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.062198661267757419,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 24721,
"name": "past_key_20_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.16168494522571565,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 25840,
"name": "past_value_21_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.07443267107009888,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 26481,
"name": "past_key_21_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.14844174683094026,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 27600,
"name": "past_value_22_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.06790729612112045,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 28241,
"name": "past_key_22_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1573055237531662,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 29360,
"name": "past_value_23_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
1,
96
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.0965195819735527,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 30001,
"name": "past_key_23_out",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_8",
"rank": 4,
"dimensions": [
32,
1,
96,
1
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.1642313301563263,
"offset": -128
}
}
}
},
{
"version": "QNN_TENSOR_VERSION_1",
"info": {
"id": 30248,
"name": "_model_layers_23_Add_1_Add_output_0",
"type": "QNN_TENSOR_TYPE_APP_READ",
"dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER",
"dataType": "QNN_DATATYPE_UFIXED_POINT_16",
"rank": 3,
"dimensions": [
1,
1,
3072
],
"memType": "QNN_TENSORMEMTYPE_MEMHANDLE",
"quantizeParams": {
"definition": "QNN_DEFINITION_DEFINED",
"quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET",
"scaleOffset": {
"scale": 0.191017746925354,
"offset": -41581
}
}
}
}
],
"numUpdateableTensors": 0,
"updateableTensors": [],
"graphBlobInfoSize": 40,
"graphBlobInfo": [
{
"version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1",
"info": {
"spillFillBufferSize": 0,
"optimizationLevel": 3,
"vtcmSize": 8,
"htpDlbc": 0,
"numHvxThreads": 0
}
}
]
}
}
],
"contextMetadataSize": 8,
"contextMetadata": {
"version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1",
"info": {
"dsp arch": 73
}
},
"soc model": 43
}
}