{ "version": "QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_3", "info": { "backendId": 6, "buildId": "v2.28.0.241029232508_102474", "coreApiVersion": "2.21.0", "backendApiVersion": "5.28.0", "socVersion": "", "contextBlobVersion": "3.2.0", "contextBlobSize": 566494808, "numContextTensors": 0, "contextTensors": [], "numGraphs": 2, "graphs": [ { "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", "info": { "graphName": "ar128_cl4096_4_of_4", "numGraphInputs": 20, "graphInputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1, "name": "past_key_24_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16835589706897736, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 6, "name": "past_key_29_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.17043833434581757, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 9, "name": "past_value_29_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09825660288333893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 16, "name": "past_value_24_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.07847104221582413, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 125, "name": "past_key_30_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.2074936181306839, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 128, "name": "past_value_30_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10411321371793747, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 267, "name": "past_key_31_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.18935787677764893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 270, "name": "past_value_31_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.19053252041339875, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 441, "name": "past_key_25_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1846073865890503, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 444, "name": "past_value_25_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09766243398189545, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 583, "name": "past_key_26_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15839417278766633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 586, "name": "past_value_26_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09959074854850769, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 731, "name": "past_key_27_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15847137570381165, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 734, "name": "past_value_27_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.08399864286184311, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 875, "name": "past_key_28_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 3968 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1828504055738449, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 878, "name": "past_value_28_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 3968, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09687352180480957, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1041, "name": "_model_layers_23_Add_1_Add_output_0", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 128, 3072 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.191017746925354, "offset": -41581 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1946, "name": "position_ids_cos", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 128, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1948, "name": "position_ids_sin", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 128, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 2653, "name": "attention_mask", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 128, 4096 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0007629510946571827, "offset": -65535 } } } } ], "numGraphOutputs": 17, "graphOutputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 1913, "name": "past_value_24_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.07847104221582413, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 2556, "name": "past_key_24_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16835589706897736, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 3676, "name": "past_value_25_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09766243398189545, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 4317, "name": "past_key_25_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1846073865890503, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 5436, "name": "past_value_26_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09959074854850769, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 6077, "name": "past_key_26_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15839417278766633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 7196, "name": "past_value_27_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.08399864286184311, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 7837, "name": "past_key_27_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15847137570381165, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 8956, "name": "past_value_28_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09687352180480957, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 9597, "name": "past_key_28_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1828504055738449, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 10716, "name": "past_value_29_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09825660288333893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 11357, "name": "past_key_29_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.17043833434581757, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 12476, "name": "past_value_30_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10411321371793747, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 13117, "name": "past_key_30_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.2074936181306839, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 14236, "name": "past_value_31_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 128, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.19053252041339875, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 14877, "name": "past_key_31_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 128 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.18935787677764893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15140, "name": "logits", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 128, 32064 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.001873409142717719, "offset": -26111 } } } } ], "numUpdateableTensors": 0, "updateableTensors": [], "graphBlobInfoSize": 40, "graphBlobInfo": [ { "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", "info": { "spillFillBufferSize": 0, "optimizationLevel": 3, "vtcmSize": 8, "htpDlbc": 0, "numHvxThreads": 0 } } ] } }, { "version": "QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_3", "info": { "graphName": "ar1_cl4096_4_of_4", "numGraphInputs": 20, "graphInputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15141, "name": "past_key_24_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16835589706897736, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15146, "name": "past_key_29_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.17043833434581757, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15149, "name": "past_value_29_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09825660288333893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15156, "name": "past_value_24_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.07847104221582413, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15265, "name": "past_key_30_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.2074936181306839, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15268, "name": "past_value_30_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10411321371793747, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15407, "name": "past_key_31_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.18935787677764893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15410, "name": "past_value_31_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.19053252041339875, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15581, "name": "past_key_25_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1846073865890503, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15584, "name": "past_value_25_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09766243398189545, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15723, "name": "past_key_26_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15839417278766633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15726, "name": "past_value_26_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09959074854850769, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15871, "name": "past_key_27_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15847137570381165, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 15874, "name": "past_value_27_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.08399864286184311, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 16015, "name": "past_key_28_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 4095 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1828504055738449, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 16018, "name": "past_value_28_in", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 4095, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09687352180480957, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 16181, "name": "_model_layers_23_Add_1_Add_output_0", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 1, 3072 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.191017746925354, "offset": -41581 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17086, "name": "position_ids_cos", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 1, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17088, "name": "position_ids_sin", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 1, 48 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.00003632373773143627, "offset": -32768 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17793, "name": "attention_mask", "type": "QNN_TENSOR_TYPE_APP_WRITE", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 4, "dimensions": [ 1, 1, 1, 4096 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.0007629510946571827, "offset": -65535 } } } } ], "numGraphOutputs": 17, "graphOutputs": [ { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17053, "name": "past_value_24_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.07847104221582413, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 17696, "name": "past_key_24_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.16835589706897736, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 18816, "name": "past_value_25_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09766243398189545, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 19457, "name": "past_key_25_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1846073865890503, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 20576, "name": "past_value_26_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09959074854850769, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 21217, "name": "past_key_26_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15839417278766633, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 22336, "name": "past_value_27_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.08399864286184311, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 22977, "name": "past_key_27_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.15847137570381165, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 24096, "name": "past_value_28_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09687352180480957, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 24737, "name": "past_key_28_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.1828504055738449, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 25856, "name": "past_value_29_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.09825660288333893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 26497, "name": "past_key_29_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.17043833434581757, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 27616, "name": "past_value_30_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.10411321371793747, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 28257, "name": "past_key_30_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.2074936181306839, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 29376, "name": "past_value_31_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 1, 96 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.19053252041339875, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 30017, "name": "past_key_31_out", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_8", "rank": 4, "dimensions": [ 32, 1, 96, 1 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.18935787677764893, "offset": -128 } } } }, { "version": "QNN_TENSOR_VERSION_1", "info": { "id": 30280, "name": "logits", "type": "QNN_TENSOR_TYPE_APP_READ", "dataFormat": "QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER", "dataType": "QNN_DATATYPE_UFIXED_POINT_16", "rank": 3, "dimensions": [ 1, 1, 32064 ], "memType": "QNN_TENSORMEMTYPE_MEMHANDLE", "quantizeParams": { "definition": "QNN_DEFINITION_DEFINED", "quantizationEncoding": "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET", "scaleOffset": { "scale": 0.001873409142717719, "offset": -26111 } } } } ], "numUpdateableTensors": 0, "updateableTensors": [], "graphBlobInfoSize": 40, "graphBlobInfo": [ { "version": "QNN_SYSTEM_CONTEXT_HTP_GRAPH_INFO_BLOB_VERSION_V1", "info": { "spillFillBufferSize": 0, "optimizationLevel": 3, "vtcmSize": 8, "htpDlbc": 0, "numHvxThreads": 0 } } ] } } ], "contextMetadataSize": 8, "contextMetadata": { "version": "QNN_SYSTEM_CONTEXT_HTP_CONTEXT_INFO_BLOB_VERSION_V1", "info": { "dsp arch": 73 } }, "soc model": 43 } }