diff --git "a/weight_sharing_model_3_of_4.serialized.json" "b/weight_sharing_model_3_of_4.serialized.json" new file mode 100644--- /dev/null +++ "b/weight_sharing_model_3_of_4.serialized.json" @@ -0,0 +1,1999 @@ +{ + "version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3", + "info": { + "backendId": 6, + "buildId": "v2.28.0.241029232508_102474", + "coreApiVersion": "2.21.0", + "backendApiVersion": "5.28.0", + "socVersion": "", + "contextBlobVersion": "3.2.0", + "contextBlobSize": 467141712, + "numContextTensors": 0, + "contextTensors": [], + "numGraphs": 2, + "graphs": [ + { + "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", + "info": { + "graphName": "ar128_cl4096_3_of_4", + "numGraphInputs": 20, + "graphInputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1, + "name": "past_key_16_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15907233953475953, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 6, + "name": "past_key_21_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14844174683094026, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 9, + "name": "past_value_21_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07443267107009888, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 16, + "name": "past_value_16_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07209276407957077, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 125, + "name": "past_key_22_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1573055237531662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 128, + "name": "past_value_22_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06790729612112045, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 267, + "name": "past_key_23_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1642313301563263, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 270, + "name": "past_value_23_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0965195819735527, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 441, + "name": "past_key_17_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17103533446788789, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 444, + "name": "past_value_17_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06805716454982758, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 583, + "name": "past_key_18_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14796161651611329, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 586, + "name": "past_value_18_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06999378651380539, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 731, + "name": "past_key_19_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15470433235168458, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 734, + "name": "past_value_19_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06925888359546662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 875, + "name": "past_key_20_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 3968 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16168494522571565, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 878, + "name": "past_value_20_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 3968, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.062198661267757419, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1041, + "name": "_model_layers_15_Add_1_Add_output_0", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 128, + 3072 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1906648725271225, + "offset": -41533 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1946, + "name": "position_ids_cos", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 128, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1948, + "name": "position_ids_sin", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 128, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 2653, + "name": "attention_mask", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 128, + 4096 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0007629510946571827, + "offset": -65535 + } + } + } + } + ], + "numGraphOutputs": 17, + "graphOutputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 1913, + "name": "past_value_16_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07209276407957077, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 2556, + "name": "past_key_16_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15907233953475953, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 3676, + "name": "past_value_17_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06805716454982758, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 4317, + "name": "past_key_17_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17103533446788789, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 5436, + "name": "past_value_18_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06999378651380539, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 6077, + "name": "past_key_18_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14796161651611329, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 7196, + "name": "past_value_19_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06925888359546662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 7837, + "name": "past_key_19_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15470433235168458, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 8956, + "name": "past_value_20_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.062198661267757419, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 9597, + "name": "past_key_20_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16168494522571565, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 10716, + "name": "past_value_21_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07443267107009888, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 11357, + "name": "past_key_21_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14844174683094026, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 12476, + "name": "past_value_22_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06790729612112045, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 13117, + "name": "past_key_22_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1573055237531662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 14236, + "name": "past_value_23_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 128, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0965195819735527, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 14877, + "name": "past_key_23_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 128 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1642313301563263, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15124, + "name": "_model_layers_23_Add_1_Add_output_0", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 128, + 3072 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.191017746925354, + "offset": -41581 + } + } + } + } + ], + "numUpdateableTensors": 0, + "updateableTensors": [], + "graphBlobInfoSize": 40, + "graphBlobInfo": [ + { + "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", + "info": { + "spillFillBufferSize": 0, + "optimizationLevel": 3, + "vtcmSize": 8, + "htpDlbc": 0, + "numHvxThreads": 0 + } + } + ] + } + }, + { + "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", + "info": { + "graphName": "ar1_cl4096_3_of_4", + "numGraphInputs": 20, + "graphInputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15125, + "name": "past_key_16_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15907233953475953, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15130, + "name": "past_key_21_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14844174683094026, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15133, + "name": "past_value_21_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07443267107009888, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15140, + "name": "past_value_16_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07209276407957077, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15249, + "name": "past_key_22_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1573055237531662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15252, + "name": "past_value_22_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06790729612112045, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15391, + "name": "past_key_23_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1642313301563263, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15394, + "name": "past_value_23_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0965195819735527, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15565, + "name": "past_key_17_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17103533446788789, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15568, + "name": "past_value_17_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06805716454982758, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15707, + "name": "past_key_18_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14796161651611329, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15710, + "name": "past_value_18_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06999378651380539, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15855, + "name": "past_key_19_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15470433235168458, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15858, + "name": "past_value_19_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06925888359546662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 15999, + "name": "past_key_20_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 4095 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16168494522571565, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 16002, + "name": "past_value_20_in", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 4095, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.062198661267757419, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 16165, + "name": "_model_layers_15_Add_1_Add_output_0", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 1, + 3072 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1906648725271225, + "offset": -41533 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17070, + "name": "position_ids_cos", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 1, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17072, + "name": "position_ids_sin", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 1, + 48 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.00003632373773143627, + "offset": -32768 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17777, + "name": "attention_mask", + "type": "QNN_TENSOR_TYPE_APP_WRITE", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 4, + "dimensions": [ + 1, + 1, + 1, + 4096 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0007629510946571827, + "offset": -65535 + } + } + } + } + ], + "numGraphOutputs": 17, + "graphOutputs": [ + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17037, + "name": "past_value_16_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07209276407957077, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 17680, + "name": "past_key_16_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15907233953475953, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 18800, + "name": "past_value_17_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06805716454982758, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 19441, + "name": "past_key_17_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.17103533446788789, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 20560, + "name": "past_value_18_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06999378651380539, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 21201, + "name": "past_key_18_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14796161651611329, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 22320, + "name": "past_value_19_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06925888359546662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 22961, + "name": "past_key_19_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.15470433235168458, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 24080, + "name": "past_value_20_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.062198661267757419, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 24721, + "name": "past_key_20_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.16168494522571565, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 25840, + "name": "past_value_21_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.07443267107009888, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 26481, + "name": "past_key_21_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.14844174683094026, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 27600, + "name": "past_value_22_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.06790729612112045, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 28241, + "name": "past_key_22_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1573055237531662, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 29360, + "name": "past_value_23_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 1, + 96 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.0965195819735527, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 30001, + "name": "past_key_23_out", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_8", + "rank": 4, + "dimensions": [ + 32, + 1, + 96, + 1 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.1642313301563263, + "offset": -128 + } + } + } + }, + { + "version": "QNN_TENSOR_VERSION_1", + "info": { + "id": 30248, + "name": "_model_layers_23_Add_1_Add_output_0", + "type": "QNN_TENSOR_TYPE_APP_READ", + "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", + "dataType": "QNN_DATATYPE_UFIXED_POINT_16", + "rank": 3, + "dimensions": [ + 1, + 1, + 3072 + ], + "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", + "quantizeParams": { + "definition": "QNN_DEFINITION_DEFINED", + "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", + "scaleOffset": { + "scale": 0.191017746925354, + "offset": -41581 + } + } + } + } + ], + "numUpdateableTensors": 0, + "updateableTensors": [], + "graphBlobInfoSize": 40, + "graphBlobInfo": [ + { + "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", + "info": { + "spillFillBufferSize": 0, + "optimizationLevel": 3, + "vtcmSize": 8, + "htpDlbc": 0, + "numHvxThreads": 0 + } + } + ] + } + } + ], + "contextMetadataSize": 8, + "contextMetadata": { + "version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1", + "info": { + "dsp arch": 73 + } + }, + "soc model": 43 + } +}