{ "version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3", "info": { "backendId": 6, "buildId": "v2.28.0.241029232508_102474", "coreApiVersion": "2.21.0", "backendApiVersion": "5.28.0", "socVersion": "", "contextBlobVersion": "3.2.0", "contextBlobSize": 467145664, "numContextTensors": 0, "contextTensors": [], "numGraphs": 2, "graphs": [ { "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", "info": { "graphName": "ar128_cl4096_2_of_4", "numGraphInputs": 20, "graphInputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1, "name": "past_key_8_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1544273942708969, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 6, "name": "past_key_13_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3072870075702667, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 9, "name": "past_value_13_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3016812205314636, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 16, "name": "past_value_8_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.03832516446709633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 125, "name": "past_key_14_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16561979055404664, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 128, "name": "past_value_14_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09218612313270569, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 267, "name": "past_key_15_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15733112394809724, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 270, "name": "past_value_15_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10109750926494599, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 441, "name": "past_key_9_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16875095665454865, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 444, "name": "past_value_9_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04019608721137047, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 583, "name": "past_key_10_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.20065709948539735, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 586, "name": "past_value_10_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04000372067093849, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 731, "name": "past_key_11_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1635383516550064, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 734, "name": "past_value_11_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.06832902133464813, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 875, "name": "past_key_12_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16393767297267915, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 878, "name": "past_value_12_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0656263679265976, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1041, "name": "_model_layers_7_Add_1_Add_output_0", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 128, 3072 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10904459655284882, "offset": -40904 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1946, "name": "position_ids_cos", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 128, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1948, "name": "position_ids_sin", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 128, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 2653, "name": "attention_mask", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 128, 4096 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0007629510946571827, "offset": -65535 } } } } ], "numGraphOutputs": 17, "graphOutputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1913, "name": "past_value_8_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.03832516446709633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 2556, "name": "past_key_8_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1544273942708969, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 3676, "name": "past_value_9_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04019608721137047, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 4317, "name": "past_key_9_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16875095665454865, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 5436, "name": "past_value_10_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04000372067093849, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 6077, "name": "past_key_10_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.20065709948539735, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 7196, "name": "past_value_11_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.06832902133464813, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 7837, "name": "past_key_11_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1635383516550064, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 8956, "name": "past_value_12_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0656263679265976, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 9597, "name": "past_key_12_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16393767297267915, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 10716, "name": "past_value_13_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3016812205314636, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 11357, "name": "past_key_13_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3072870075702667, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 12476, "name": "past_value_14_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09218612313270569, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 13117, "name": "past_key_14_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16561979055404664, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 14236, "name": "past_value_15_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10109750926494599, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 14877, "name": "past_key_15_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15733112394809724, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15124, "name": "_model_layers_15_Add_1_Add_output_0", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 128, 3072 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1906648725271225, "offset": -41533 } } } } ], "numUpdateableTensors": 0, "updateableTensors": [], "graphBlobInfoSize": 40, "graphBlobInfo": [ { "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", "info": { "spillFillBufferSize": 0, "optimizationLevel": 3, "vtcmSize": 8, "htpDlbc": 0, "numHvxThreads": 0 } } ] } }, { "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", "info": { "graphName": "ar1_cl4096_2_of_4", "numGraphInputs": 20, "graphInputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15125, "name": "past_key_8_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1544273942708969, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15130, "name": "past_key_13_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3072870075702667, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15133, "name": "past_value_13_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3016812205314636, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15140, "name": "past_value_8_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.03832516446709633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15249, "name": "past_key_14_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16561979055404664, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15252, "name": "past_value_14_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09218612313270569, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15391, "name": "past_key_15_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15733112394809724, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15394, "name": "past_value_15_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10109750926494599, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15565, "name": "past_key_9_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16875095665454865, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15568, "name": "past_value_9_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04019608721137047, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15707, "name": "past_key_10_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.20065709948539735, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15710, "name": "past_value_10_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04000372067093849, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15855, "name": "past_key_11_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1635383516550064, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15858, "name": "past_value_11_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.06832902133464813, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15999, "name": "past_key_12_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16393767297267915, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 16002, "name": "past_value_12_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0656263679265976, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 16165, "name": "_model_layers_7_Add_1_Add_output_0", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 1, 3072 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10904459655284882, "offset": -40904 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17070, "name": "position_ids_cos", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 1, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17072, "name": "position_ids_sin", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 1, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17777, "name": "attention_mask", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 1, 4096 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0007629510946571827, "offset": -65535 } } } } ], "numGraphOutputs": 17, "graphOutputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17037, "name": "past_value_8_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.03832516446709633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17680, "name": "past_key_8_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1544273942708969, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 18800, "name": "past_value_9_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04019608721137047, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 19441, "name": "past_key_9_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16875095665454865, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 20560, "name": "past_value_10_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.04000372067093849, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 21201, "name": "past_key_10_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.20065709948539735, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 22320, "name": "past_value_11_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.06832902133464813, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 22961, "name": "past_key_11_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1635383516550064, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 24080, "name": "past_value_12_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0656263679265976, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 24721, "name": "past_key_12_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16393767297267915, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 25840, "name": "past_value_13_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3016812205314636, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 26481, "name": "past_key_13_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.3072870075702667, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 27600, "name": "past_value_14_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09218612313270569, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 28241, "name": "past_key_14_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16561979055404664, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 29360, "name": "past_value_15_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10109750926494599, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 30001, "name": "past_key_15_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15733112394809724, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 30248, "name": "_model_layers_15_Add_1_Add_output_0", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 1, 3072 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1906648725271225, "offset": -41533 } } } } ], "numUpdateableTensors": 0, "updateableTensors": [], "graphBlobInfoSize": 40, "graphBlobInfo": [ { "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", "info": { "spillFillBufferSize": 0, "optimizationLevel": 3, "vtcmSize": 8, "htpDlbc": 0, "numHvxThreads": 0 } } ] } } ], "contextMetadataSize": 8, "contextMetadata": { "version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1", "info": { "dsp arch": 73 } }, "soc model": 43 } }