diff --git "a/ParakeetEncoder_v2.mlmodelc/model.mil" "b/ParakeetEncoder_v2.mlmodelc/model.mil" --- "a/ParakeetEncoder_v2.mlmodelc/model.mil" +++ "b/ParakeetEncoder_v2.mlmodelc/model.mil" @@ -1,14 +1,14 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3405.2.1"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}})] { - func main(tensor melspectogram, tensor melspectogram_length) { + func main(tensor audio_signal, tensor length) { tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; tensor x_1_perm_0 = const()[name = tensor("x_1_perm_0"), val = tensor([0, 2, 1])]; - tensor melspectogram_to_fp16_dtype_0 = const()[name = tensor("melspectogram_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor audio_signal_to_fp16_dtype_0 = const()[name = tensor("audio_signal_to_fp16_dtype_0"), val = tensor("fp16")]; tensor cast_0_to_fp16_dtype_0 = const()[name = tensor("cast_0_to_fp16_dtype_0"), val = tensor("fp16")]; tensor var_82_promoted_to_fp16 = const()[name = tensor("op_82_promoted_to_fp16"), val = tensor(-0x1p+0)]; - tensor melspectogram_length_to_fp16 = cast(dtype = cast_0_to_fp16_dtype_0, x = melspectogram_length)[name = tensor("cast_3")]; - tensor var_83_cast_fp16 = add(x = melspectogram_length_to_fp16, y = var_82_promoted_to_fp16)[name = tensor("op_83_cast_fp16")]; + tensor length_to_fp16 = cast(dtype = cast_0_to_fp16_dtype_0, x = length)[name = tensor("cast_3")]; + tensor var_83_cast_fp16 = add(x = length_to_fp16, y = var_82_promoted_to_fp16)[name = tensor("op_83_cast_fp16")]; tensor _inversed_85_y_0_to_fp16 = const()[name = tensor("_inversed_85_y_0_to_fp16"), val = tensor(0x1p-1)]; tensor _inversed_85_cast_fp16 = mul(x = var_83_cast_fp16, y = _inversed_85_y_0_to_fp16)[name = tensor("_inversed_85_cast_fp16")]; tensor var_86_to_fp16 = const()[name = tensor("op_86_to_fp16"), val = tensor(0x1p+0)]; @@ -29,8 +29,8 @@ program(1.0) tensor lengths_13_cast_fp16 = add(x = _inversed_101_cast_fp16, y = var_102_to_fp16)[name = tensor("lengths_13_cast_fp16")]; tensor lengths_cast_fp16 = floor(x = lengths_13_cast_fp16)[name = tensor("lengths_cast_fp16")]; tensor input_1_axes_0 = const()[name = tensor("input_1_axes_0"), val = tensor([1])]; - tensor melspectogram_to_fp16 = cast(dtype = melspectogram_to_fp16_dtype_0, x = melspectogram)[name = tensor("cast_2")]; - tensor x_1_cast_fp16 = transpose(perm = x_1_perm_0, x = melspectogram_to_fp16)[name = tensor("transpose_314")]; + tensor audio_signal_to_fp16 = cast(dtype = audio_signal_to_fp16_dtype_0, x = audio_signal)[name = tensor("cast_2")]; + tensor x_1_cast_fp16 = transpose(perm = x_1_perm_0, x = audio_signal_to_fp16)[name = tensor("transpose_314")]; tensor input_1_cast_fp16 = expand_dims(axes = input_1_axes_0, x = x_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; tensor input_3_pad_type_0 = const()[name = tensor("input_3_pad_type_0"), val = tensor("custom")]; tensor input_3_pad_0 = const()[name = tensor("input_3_pad_0"), val = tensor([1, 1, 1, 1])]; @@ -131,14 +131,14 @@ program(1.0) tensor var_252 = const()[name = tensor("op_252"), val = tensor([1, -1, 8, 128])]; tensor v_1_cast_fp16 = reshape(shape = var_252, x = linear_5_cast_fp16)[name = tensor("v_1_cast_fp16")]; tensor value_1_perm_0 = const()[name = tensor("value_1_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_0_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_0_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15918464)))]; - tensor var_264_cast_fp16 = add(x = q_1_cast_fp16, y = model_layers_0_self_attn_pos_bias_u_to_fp16)[name = tensor("op_264_cast_fp16")]; - tensor model_layers_0_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_0_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15920576)))]; - tensor var_266_cast_fp16 = add(x = q_1_cast_fp16, y = model_layers_0_self_attn_pos_bias_v_to_fp16)[name = tensor("op_266_cast_fp16")]; + tensor model_layers_0_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15918464))), scale = tensor([0x1.674p-8, 0x1.208p-7, 0x1.778p-8, 0x1.d7cp-8, 0x1.a3p-9, 0x1.02cp-7, 0x1.308p-8, 0x1.4fcp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_264_cast_fp16 = add(x = q_1_cast_fp16, y = model_layers_0_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_264_cast_fp16")]; + tensor model_layers_0_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15919552))), scale = tensor([0x1.7bp-10, 0x1.ff4p-9, 0x1.4d4p-9, 0x1.0fcp-8, 0x1.71p-10, 0x1.64cp-9, 0x1.f6cp-10, 0x1.06p-9]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_266_cast_fp16 = add(x = q_1_cast_fp16, y = model_layers_0_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_266_cast_fp16")]; tensor q_with_bias_v_1_perm_0 = const()[name = tensor("q_with_bias_v_1_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_7_transpose_x_0 = const()[name = tensor("x_7_transpose_x_0"), val = tensor(false)]; tensor x_7_transpose_y_0 = const()[name = tensor("x_7_transpose_y_0"), val = tensor(false)]; - tensor op_268_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_268_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15922688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16180096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_268_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_268_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15920640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16178048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_1_cast_fp16 = transpose(perm = q_with_bias_v_1_perm_0, x = var_266_cast_fp16)[name = tensor("transpose_311")]; tensor x_7_cast_fp16 = matmul(transpose_x = x_7_transpose_x_0, transpose_y = x_7_transpose_y_0, x = q_with_bias_v_1_cast_fp16, y = op_268_to_fp16_quantized)[name = tensor("x_7_cast_fp16")]; tensor x_9_pad_0 = const()[name = tensor("x_9_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -182,12 +182,12 @@ program(1.0) tensor var_301 = const()[name = tensor("op_301"), val = tensor([1, -1, 1024])]; tensor var_300_cast_fp16 = transpose(perm = var_300_perm_0, x = x_13_cast_fp16)[name = tensor("transpose_307")]; tensor input_35_cast_fp16 = reshape(shape = var_301, x = var_300_cast_fp16)[name = tensor("input_35_cast_fp16")]; - tensor model_layers_0_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16180672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17229312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_0_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16178624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17227264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_0_self_attn_linear_out_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = tensor("linear_7_cast_fp16")]; tensor input_39_cast_fp16 = add(x = input_31_cast_fp16, y = linear_7_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor x_17_axes_0 = const()[name = tensor("x_17_axes_0"), val = tensor([-1])]; - tensor model_layers_0_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_0_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17231424)))]; - tensor model_layers_0_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_0_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17233536)))]; + tensor model_layers_0_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_0_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17229376)))]; + tensor model_layers_0_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_0_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17231488)))]; tensor x_17_cast_fp16 = layer_norm(axes = x_17_axes_0, beta = model_layers_0_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_0_norm_conv_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("x_17_cast_fp16")]; tensor input_41_perm_0 = const()[name = tensor("input_41_perm_0"), val = tensor([0, 2, 1])]; tensor input_43_pad_type_0 = const()[name = tensor("input_43_pad_type_0"), val = tensor("valid")]; @@ -195,7 +195,7 @@ program(1.0) tensor input_43_pad_0 = const()[name = tensor("input_43_pad_0"), val = tensor([0, 0])]; tensor input_43_dilations_0 = const()[name = tensor("input_43_dilations_0"), val = tensor([1])]; tensor input_43_groups_0 = const()[name = tensor("input_43_groups_0"), val = tensor(1)]; - tensor model_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17235648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19334976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17233600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332928))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_41_cast_fp16 = transpose(perm = input_41_perm_0, x = x_17_cast_fp16)[name = tensor("transpose_306")]; tensor input_43_cast_fp16 = conv(dilations = input_43_dilations_0, groups = input_43_groups_0, pad = input_43_pad_0, pad_type = input_43_pad_type_0, strides = input_43_strides_0, weight = model_layers_0_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = tensor("input_43_cast_fp16")]; tensor x_19_split_num_splits_0 = const()[name = tensor("x_19_split_num_splits_0"), val = tensor(2)]; @@ -215,8 +215,8 @@ program(1.0) tensor input_49_strides_0 = const()[name = tensor("input_49_strides_0"), val = tensor([1])]; tensor input_49_pad_0 = const()[name = tensor("input_49_pad_0"), val = tensor([0, 0])]; tensor input_49_dilations_0 = const()[name = tensor("input_49_dilations_0"), val = tensor([1])]; - tensor const_248_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_248_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19339136))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19348416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_249_to_fp16 = const()[name = tensor("const_249_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19350528)))]; + tensor const_248_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_248_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19337088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19346368))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_249_to_fp16 = const()[name = tensor("const_249_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19348480)))]; tensor input_51_cast_fp16 = conv(bias = const_249_to_fp16, dilations = input_49_dilations_0, groups = input_49_groups_0, pad = input_49_pad_0, pad_type = input_49_pad_type_0, strides = input_49_strides_0, weight = const_248_to_fp16_quantized, x = input_47_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor input_53_cast_fp16 = silu(x = input_51_cast_fp16)[name = tensor("input_53_cast_fp16")]; tensor x_21_pad_type_0 = const()[name = tensor("x_21_pad_type_0"), val = tensor("valid")]; @@ -224,64 +224,64 @@ program(1.0) tensor x_21_pad_0 = const()[name = tensor("x_21_pad_0"), val = tensor([0, 0])]; tensor x_21_dilations_0 = const()[name = tensor("x_21_dilations_0"), val = tensor([1])]; tensor x_21_groups_0 = const()[name = tensor("x_21_groups_0"), val = tensor(1)]; - tensor model_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19352640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20401280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19350592))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20399232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_21_cast_fp16 = conv(dilations = x_21_dilations_0, groups = x_21_groups_0, pad = x_21_pad_0, pad_type = x_21_pad_type_0, strides = x_21_strides_0, weight = model_layers_0_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_53_cast_fp16)[name = tensor("x_21_cast_fp16")]; tensor input_55_perm_0 = const()[name = tensor("input_55_perm_0"), val = tensor([0, 2, 1])]; tensor input_55_cast_fp16 = transpose(perm = input_55_perm_0, x = x_21_cast_fp16)[name = tensor("transpose_305")]; tensor input_57_cast_fp16 = add(x = input_39_cast_fp16, y = input_55_cast_fp16)[name = tensor("input_57_cast_fp16")]; tensor input_59_axes_0 = const()[name = tensor("input_59_axes_0"), val = tensor([-1])]; - tensor model_layers_0_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_0_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20403392)))]; - tensor model_layers_0_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_0_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20405504)))]; + tensor model_layers_0_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_0_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20401344)))]; + tensor model_layers_0_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_0_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20403456)))]; tensor input_59_cast_fp16 = layer_norm(axes = input_59_axes_0, beta = model_layers_0_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_0_norm_feed_forward2_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("input_59_cast_fp16")]; - tensor model_layers_0_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20407616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24601984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_0_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20405568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24599936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_0_feed_forward2_linear1_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = tensor("linear_8_cast_fp16")]; tensor input_63_cast_fp16 = silu(x = linear_8_cast_fp16)[name = tensor("input_63_cast_fp16")]; - tensor model_layers_0_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24610240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28804608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_0_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_0_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24608192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28802560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_9_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_0_feed_forward2_linear2_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = tensor("linear_9_cast_fp16")]; tensor var_361_to_fp16 = const()[name = tensor("op_361_to_fp16"), val = tensor(0x1p-1)]; tensor var_362_cast_fp16 = mul(x = linear_9_cast_fp16, y = var_361_to_fp16)[name = tensor("op_362_cast_fp16")]; tensor input_69_cast_fp16 = add(x = input_57_cast_fp16, y = var_362_cast_fp16)[name = tensor("input_69_cast_fp16")]; tensor input_71_axes_0 = const()[name = tensor("input_71_axes_0"), val = tensor([-1])]; - tensor model_layers_0_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_0_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28806720)))]; - tensor model_layers_0_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_0_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28808832)))]; + tensor model_layers_0_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_0_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28804672)))]; + tensor model_layers_0_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_0_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28806784)))]; tensor input_71_cast_fp16 = layer_norm(axes = input_71_axes_0, beta = model_layers_0_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_0_norm_out_weight_to_fp16, x = input_69_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor input_73_axes_0 = const()[name = tensor("input_73_axes_0"), val = tensor([-1])]; - tensor model_layers_1_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28810944)))]; - tensor model_layers_1_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28813056)))]; + tensor model_layers_1_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28808896)))]; + tensor model_layers_1_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28811008)))]; tensor input_73_cast_fp16 = layer_norm(axes = input_73_axes_0, beta = model_layers_1_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_1_norm_feed_forward1_weight_to_fp16, x = input_71_cast_fp16)[name = tensor("input_73_cast_fp16")]; - tensor model_layers_1_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28815168))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33009536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_1_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28813120))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33007488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_1_feed_forward1_linear1_weight_to_fp16_quantized, x = input_73_cast_fp16)[name = tensor("linear_10_cast_fp16")]; tensor input_77_cast_fp16 = silu(x = linear_10_cast_fp16)[name = tensor("input_77_cast_fp16")]; - tensor model_layers_1_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33017792))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37212160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_1_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33015744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37210112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_11_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_1_feed_forward1_linear2_weight_to_fp16_quantized, x = input_77_cast_fp16)[name = tensor("linear_11_cast_fp16")]; tensor var_390_to_fp16 = const()[name = tensor("op_390_to_fp16"), val = tensor(0x1p-1)]; tensor var_391_cast_fp16 = mul(x = linear_11_cast_fp16, y = var_390_to_fp16)[name = tensor("op_391_cast_fp16")]; tensor input_83_cast_fp16 = add(x = input_71_cast_fp16, y = var_391_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor query_3_axes_0 = const()[name = tensor("query_3_axes_0"), val = tensor([-1])]; - tensor model_layers_1_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37214272)))]; - tensor model_layers_1_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37216384)))]; + tensor model_layers_1_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37212224)))]; + tensor model_layers_1_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37214336)))]; tensor query_3_cast_fp16 = layer_norm(axes = query_3_axes_0, beta = model_layers_1_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_1_norm_self_att_weight_to_fp16, x = input_83_cast_fp16)[name = tensor("query_3_cast_fp16")]; - tensor model_layers_1_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37218496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38267136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_1_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37216448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38265088))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_1_self_attn_linear_q_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor("linear_12_cast_fp16")]; tensor var_407 = const()[name = tensor("op_407"), val = tensor([1, -1, 8, 128])]; tensor q_7_cast_fp16 = reshape(shape = var_407, x = linear_12_cast_fp16)[name = tensor("q_7_cast_fp16")]; - tensor model_layers_1_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38269248))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39317888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_1_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38267200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39315840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_13_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_1_self_attn_linear_k_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor("linear_13_cast_fp16")]; tensor var_411 = const()[name = tensor("op_411"), val = tensor([1, -1, 8, 128])]; tensor k_5_cast_fp16 = reshape(shape = var_411, x = linear_13_cast_fp16)[name = tensor("k_5_cast_fp16")]; - tensor model_layers_1_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39320000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40368640))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_1_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39317952))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40366592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_14_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_1_self_attn_linear_v_weight_to_fp16_quantized, x = query_3_cast_fp16)[name = tensor("linear_14_cast_fp16")]; tensor var_415 = const()[name = tensor("op_415"), val = tensor([1, -1, 8, 128])]; tensor v_3_cast_fp16 = reshape(shape = var_415, x = linear_14_cast_fp16)[name = tensor("v_3_cast_fp16")]; tensor value_3_perm_0 = const()[name = tensor("value_3_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_1_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_1_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40370752)))]; - tensor var_427_cast_fp16 = add(x = q_7_cast_fp16, y = model_layers_1_self_attn_pos_bias_u_to_fp16)[name = tensor("op_427_cast_fp16")]; - tensor model_layers_1_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_1_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40372864)))]; - tensor var_429_cast_fp16 = add(x = q_7_cast_fp16, y = model_layers_1_self_attn_pos_bias_v_to_fp16)[name = tensor("op_429_cast_fp16")]; + tensor model_layers_1_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40368704))), scale = tensor([0x1.5ep-8, 0x1.294p-7, 0x1.72cp-7, 0x1.868p-8, 0x1.49cp-7, 0x1.618p-8, 0x1.eecp-8, 0x1.e5p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_427_cast_fp16 = add(x = q_7_cast_fp16, y = model_layers_1_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_427_cast_fp16")]; + tensor model_layers_1_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40369792))), scale = tensor([0x1.d78p-8, 0x1.5e8p-8, 0x1.9fcp-9, 0x1.3a4p-9, 0x1.a6p-9, 0x1.934p-11, 0x1.09cp-8, 0x1.5a4p-9]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_429_cast_fp16 = add(x = q_7_cast_fp16, y = model_layers_1_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_429_cast_fp16")]; tensor q_with_bias_v_3_perm_0 = const()[name = tensor("q_with_bias_v_3_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_29_transpose_x_0 = const()[name = tensor("x_29_transpose_x_0"), val = tensor(false)]; tensor x_29_transpose_y_0 = const()[name = tensor("x_29_transpose_y_0"), val = tensor(false)]; - tensor op_431_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_431_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40374976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40632064))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_431_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_431_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40370880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40627968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_3_cast_fp16 = transpose(perm = q_with_bias_v_3_perm_0, x = var_429_cast_fp16)[name = tensor("transpose_304")]; tensor x_29_cast_fp16 = matmul(transpose_x = x_29_transpose_x_0, transpose_y = x_29_transpose_y_0, x = q_with_bias_v_3_cast_fp16, y = op_431_to_fp16_quantized)[name = tensor("x_29_cast_fp16")]; tensor x_31_pad_0 = const()[name = tensor("x_31_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -321,12 +321,12 @@ program(1.0) tensor var_464 = const()[name = tensor("op_464"), val = tensor([1, -1, 1024])]; tensor var_463_cast_fp16 = transpose(perm = var_463_perm_0, x = x_35_cast_fp16)[name = tensor("transpose_300")]; tensor input_87_cast_fp16 = reshape(shape = var_464, x = var_463_cast_fp16)[name = tensor("input_87_cast_fp16")]; - tensor model_layers_1_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40632640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41681280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_1_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40628544))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41677184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_16_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_1_self_attn_linear_out_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = tensor("linear_16_cast_fp16")]; tensor input_91_cast_fp16 = add(x = input_83_cast_fp16, y = linear_16_cast_fp16)[name = tensor("input_91_cast_fp16")]; tensor x_39_axes_0 = const()[name = tensor("x_39_axes_0"), val = tensor([-1])]; - tensor model_layers_1_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41683392)))]; - tensor model_layers_1_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41685504)))]; + tensor model_layers_1_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41679296)))]; + tensor model_layers_1_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41681408)))]; tensor x_39_cast_fp16 = layer_norm(axes = x_39_axes_0, beta = model_layers_1_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_1_norm_conv_weight_to_fp16, x = input_91_cast_fp16)[name = tensor("x_39_cast_fp16")]; tensor input_93_perm_0 = const()[name = tensor("input_93_perm_0"), val = tensor([0, 2, 1])]; tensor input_95_pad_type_0 = const()[name = tensor("input_95_pad_type_0"), val = tensor("valid")]; @@ -334,7 +334,7 @@ program(1.0) tensor input_95_pad_0 = const()[name = tensor("input_95_pad_0"), val = tensor([0, 0])]; tensor input_95_dilations_0 = const()[name = tensor("input_95_dilations_0"), val = tensor([1])]; tensor input_95_groups_0 = const()[name = tensor("input_95_groups_0"), val = tensor(1)]; - tensor model_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41687616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43784832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41683520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43780736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_93_cast_fp16 = transpose(perm = input_93_perm_0, x = x_39_cast_fp16)[name = tensor("transpose_299")]; tensor input_95_cast_fp16 = conv(dilations = input_95_dilations_0, groups = input_95_groups_0, pad = input_95_pad_0, pad_type = input_95_pad_type_0, strides = input_95_strides_0, weight = model_layers_1_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_93_cast_fp16)[name = tensor("input_95_cast_fp16")]; tensor x_41_split_num_splits_0 = const()[name = tensor("x_41_split_num_splits_0"), val = tensor(2)]; @@ -352,8 +352,8 @@ program(1.0) tensor input_101_strides_0 = const()[name = tensor("input_101_strides_0"), val = tensor([1])]; tensor input_101_pad_0 = const()[name = tensor("input_101_pad_0"), val = tensor([0, 0])]; tensor input_101_dilations_0 = const()[name = tensor("input_101_dilations_0"), val = tensor([1])]; - tensor const_250_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_250_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43788992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43798272))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_251_to_fp16 = const()[name = tensor("const_251_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43800384)))]; + tensor const_250_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_250_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43784896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43794176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_251_to_fp16 = const()[name = tensor("const_251_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43796288)))]; tensor input_103_cast_fp16 = conv(bias = const_251_to_fp16, dilations = input_101_dilations_0, groups = input_101_groups_0, pad = input_101_pad_0, pad_type = input_101_pad_type_0, strides = input_101_strides_0, weight = const_250_to_fp16_quantized, x = input_99_cast_fp16)[name = tensor("input_103_cast_fp16")]; tensor input_105_cast_fp16 = silu(x = input_103_cast_fp16)[name = tensor("input_105_cast_fp16")]; tensor x_43_pad_type_0 = const()[name = tensor("x_43_pad_type_0"), val = tensor("valid")]; @@ -361,64 +361,64 @@ program(1.0) tensor x_43_pad_0 = const()[name = tensor("x_43_pad_0"), val = tensor([0, 0])]; tensor x_43_dilations_0 = const()[name = tensor("x_43_dilations_0"), val = tensor([1])]; tensor x_43_groups_0 = const()[name = tensor("x_43_groups_0"), val = tensor(1)]; - tensor model_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43802496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44851136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43798400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44847040))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_43_cast_fp16 = conv(dilations = x_43_dilations_0, groups = x_43_groups_0, pad = x_43_pad_0, pad_type = x_43_pad_type_0, strides = x_43_strides_0, weight = model_layers_1_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = tensor("x_43_cast_fp16")]; tensor input_107_perm_0 = const()[name = tensor("input_107_perm_0"), val = tensor([0, 2, 1])]; tensor input_107_cast_fp16 = transpose(perm = input_107_perm_0, x = x_43_cast_fp16)[name = tensor("transpose_298")]; tensor input_109_cast_fp16 = add(x = input_91_cast_fp16, y = input_107_cast_fp16)[name = tensor("input_109_cast_fp16")]; tensor input_111_axes_0 = const()[name = tensor("input_111_axes_0"), val = tensor([-1])]; - tensor model_layers_1_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44853248)))]; - tensor model_layers_1_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44855360)))]; + tensor model_layers_1_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44849152)))]; + tensor model_layers_1_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44851264)))]; tensor input_111_cast_fp16 = layer_norm(axes = input_111_axes_0, beta = model_layers_1_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_1_norm_feed_forward2_weight_to_fp16, x = input_109_cast_fp16)[name = tensor("input_111_cast_fp16")]; - tensor model_layers_1_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44857472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49051840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_1_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44853376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49047744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_1_feed_forward2_linear1_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = tensor("linear_17_cast_fp16")]; tensor input_115_cast_fp16 = silu(x = linear_17_cast_fp16)[name = tensor("input_115_cast_fp16")]; - tensor model_layers_1_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49060096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53254464))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_1_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_1_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49056000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53250368))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_18_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_1_feed_forward2_linear2_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = tensor("linear_18_cast_fp16")]; tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1p-1)]; tensor var_525_cast_fp16 = mul(x = linear_18_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; tensor input_121_cast_fp16 = add(x = input_109_cast_fp16, y = var_525_cast_fp16)[name = tensor("input_121_cast_fp16")]; tensor input_123_axes_0 = const()[name = tensor("input_123_axes_0"), val = tensor([-1])]; - tensor model_layers_1_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53256576)))]; - tensor model_layers_1_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53258688)))]; + tensor model_layers_1_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_1_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53252480)))]; + tensor model_layers_1_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_1_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53254592)))]; tensor input_123_cast_fp16 = layer_norm(axes = input_123_axes_0, beta = model_layers_1_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_1_norm_out_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("input_123_cast_fp16")]; tensor input_125_axes_0 = const()[name = tensor("input_125_axes_0"), val = tensor([-1])]; - tensor model_layers_2_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53260800)))]; - tensor model_layers_2_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53262912)))]; + tensor model_layers_2_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53256704)))]; + tensor model_layers_2_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53258816)))]; tensor input_125_cast_fp16 = layer_norm(axes = input_125_axes_0, beta = model_layers_2_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_2_norm_feed_forward1_weight_to_fp16, x = input_123_cast_fp16)[name = tensor("input_125_cast_fp16")]; - tensor model_layers_2_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53265024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57459392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_2_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53260928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57455296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_2_feed_forward1_linear1_weight_to_fp16_quantized, x = input_125_cast_fp16)[name = tensor("linear_19_cast_fp16")]; tensor input_129_cast_fp16 = silu(x = linear_19_cast_fp16)[name = tensor("input_129_cast_fp16")]; - tensor model_layers_2_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57467648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61662016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_2_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57463552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61657920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_20_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_2_feed_forward1_linear2_weight_to_fp16_quantized, x = input_129_cast_fp16)[name = tensor("linear_20_cast_fp16")]; tensor var_553_to_fp16 = const()[name = tensor("op_553_to_fp16"), val = tensor(0x1p-1)]; tensor var_554_cast_fp16 = mul(x = linear_20_cast_fp16, y = var_553_to_fp16)[name = tensor("op_554_cast_fp16")]; tensor input_135_cast_fp16 = add(x = input_123_cast_fp16, y = var_554_cast_fp16)[name = tensor("input_135_cast_fp16")]; tensor query_5_axes_0 = const()[name = tensor("query_5_axes_0"), val = tensor([-1])]; - tensor model_layers_2_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61664128)))]; - tensor model_layers_2_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61666240)))]; + tensor model_layers_2_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61660032)))]; + tensor model_layers_2_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61662144)))]; tensor query_5_cast_fp16 = layer_norm(axes = query_5_axes_0, beta = model_layers_2_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_2_norm_self_att_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("query_5_cast_fp16")]; - tensor model_layers_2_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61668352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62716992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_2_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(61664256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62712896))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_21_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_2_self_attn_linear_q_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor("linear_21_cast_fp16")]; tensor var_570 = const()[name = tensor("op_570"), val = tensor([1, -1, 8, 128])]; tensor q_13_cast_fp16 = reshape(shape = var_570, x = linear_21_cast_fp16)[name = tensor("q_13_cast_fp16")]; - tensor model_layers_2_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62719104))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63767744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_2_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62715008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63763648))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_2_self_attn_linear_k_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor("linear_22_cast_fp16")]; tensor var_574 = const()[name = tensor("op_574"), val = tensor([1, -1, 8, 128])]; tensor k_9_cast_fp16 = reshape(shape = var_574, x = linear_22_cast_fp16)[name = tensor("k_9_cast_fp16")]; - tensor model_layers_2_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63769856))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64818496))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_2_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63765760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64814400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_23_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_2_self_attn_linear_v_weight_to_fp16_quantized, x = query_5_cast_fp16)[name = tensor("linear_23_cast_fp16")]; tensor var_578 = const()[name = tensor("op_578"), val = tensor([1, -1, 8, 128])]; tensor v_5_cast_fp16 = reshape(shape = var_578, x = linear_23_cast_fp16)[name = tensor("v_5_cast_fp16")]; tensor value_5_perm_0 = const()[name = tensor("value_5_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_2_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_2_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64820608)))]; - tensor var_590_cast_fp16 = add(x = q_13_cast_fp16, y = model_layers_2_self_attn_pos_bias_u_to_fp16)[name = tensor("op_590_cast_fp16")]; - tensor model_layers_2_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_2_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64822720)))]; - tensor var_592_cast_fp16 = add(x = q_13_cast_fp16, y = model_layers_2_self_attn_pos_bias_v_to_fp16)[name = tensor("op_592_cast_fp16")]; + tensor model_layers_2_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64816512))), scale = tensor([0x1.784p-7, 0x1.cacp-8, 0x1.97p-8, 0x1.00cp-7, 0x1.8b4p-7, 0x1.9c4p-7, 0x1.3d4p-8, 0x1.accp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_590_cast_fp16 = add(x = q_13_cast_fp16, y = model_layers_2_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_590_cast_fp16")]; + tensor model_layers_2_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64817600))), scale = tensor([0x1.2bp-9, 0x1.da4p-11, 0x1.cecp-9, 0x1.9cp-9, 0x1.9bcp-9, 0x1.664p-8, 0x1.438p-9, 0x1.008p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_592_cast_fp16 = add(x = q_13_cast_fp16, y = model_layers_2_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_592_cast_fp16")]; tensor q_with_bias_v_5_perm_0 = const()[name = tensor("q_with_bias_v_5_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_51_transpose_x_0 = const()[name = tensor("x_51_transpose_x_0"), val = tensor(false)]; tensor x_51_transpose_y_0 = const()[name = tensor("x_51_transpose_y_0"), val = tensor(false)]; - tensor op_594_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_594_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64824832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65081920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_594_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_594_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64818688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65075776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_5_cast_fp16 = transpose(perm = q_with_bias_v_5_perm_0, x = var_592_cast_fp16)[name = tensor("transpose_297")]; tensor x_51_cast_fp16 = matmul(transpose_x = x_51_transpose_x_0, transpose_y = x_51_transpose_y_0, x = q_with_bias_v_5_cast_fp16, y = op_594_to_fp16_quantized)[name = tensor("x_51_cast_fp16")]; tensor x_53_pad_0 = const()[name = tensor("x_53_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -458,12 +458,12 @@ program(1.0) tensor var_627 = const()[name = tensor("op_627"), val = tensor([1, -1, 1024])]; tensor var_626_cast_fp16 = transpose(perm = var_626_perm_0, x = x_57_cast_fp16)[name = tensor("transpose_293")]; tensor input_139_cast_fp16 = reshape(shape = var_627, x = var_626_cast_fp16)[name = tensor("input_139_cast_fp16")]; - tensor model_layers_2_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65082496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66131136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_2_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65076352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66124992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_25_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_2_self_attn_linear_out_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = tensor("linear_25_cast_fp16")]; tensor input_143_cast_fp16 = add(x = input_135_cast_fp16, y = linear_25_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor x_61_axes_0 = const()[name = tensor("x_61_axes_0"), val = tensor([-1])]; - tensor model_layers_2_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66133248)))]; - tensor model_layers_2_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66135360)))]; + tensor model_layers_2_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66127104)))]; + tensor model_layers_2_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66129216)))]; tensor x_61_cast_fp16 = layer_norm(axes = x_61_axes_0, beta = model_layers_2_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_2_norm_conv_weight_to_fp16, x = input_143_cast_fp16)[name = tensor("x_61_cast_fp16")]; tensor input_145_perm_0 = const()[name = tensor("input_145_perm_0"), val = tensor([0, 2, 1])]; tensor input_147_pad_type_0 = const()[name = tensor("input_147_pad_type_0"), val = tensor("valid")]; @@ -471,7 +471,7 @@ program(1.0) tensor input_147_pad_0 = const()[name = tensor("input_147_pad_0"), val = tensor([0, 0])]; tensor input_147_dilations_0 = const()[name = tensor("input_147_dilations_0"), val = tensor([1])]; tensor input_147_groups_0 = const()[name = tensor("input_147_groups_0"), val = tensor(1)]; - tensor model_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66137472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68234688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66131328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68228544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_145_cast_fp16 = transpose(perm = input_145_perm_0, x = x_61_cast_fp16)[name = tensor("transpose_292")]; tensor input_147_cast_fp16 = conv(dilations = input_147_dilations_0, groups = input_147_groups_0, pad = input_147_pad_0, pad_type = input_147_pad_type_0, strides = input_147_strides_0, weight = model_layers_2_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = tensor("input_147_cast_fp16")]; tensor x_63_split_num_splits_0 = const()[name = tensor("x_63_split_num_splits_0"), val = tensor(2)]; @@ -489,8 +489,8 @@ program(1.0) tensor input_153_strides_0 = const()[name = tensor("input_153_strides_0"), val = tensor([1])]; tensor input_153_pad_0 = const()[name = tensor("input_153_pad_0"), val = tensor([0, 0])]; tensor input_153_dilations_0 = const()[name = tensor("input_153_dilations_0"), val = tensor([1])]; - tensor const_252_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_252_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68238848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68248128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_253_to_fp16 = const()[name = tensor("const_253_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68250240)))]; + tensor const_252_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_252_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68232704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68241984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_253_to_fp16 = const()[name = tensor("const_253_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68244096)))]; tensor input_155_cast_fp16 = conv(bias = const_253_to_fp16, dilations = input_153_dilations_0, groups = input_153_groups_0, pad = input_153_pad_0, pad_type = input_153_pad_type_0, strides = input_153_strides_0, weight = const_252_to_fp16_quantized, x = input_151_cast_fp16)[name = tensor("input_155_cast_fp16")]; tensor input_157_cast_fp16 = silu(x = input_155_cast_fp16)[name = tensor("input_157_cast_fp16")]; tensor x_65_pad_type_0 = const()[name = tensor("x_65_pad_type_0"), val = tensor("valid")]; @@ -498,64 +498,64 @@ program(1.0) tensor x_65_pad_0 = const()[name = tensor("x_65_pad_0"), val = tensor([0, 0])]; tensor x_65_dilations_0 = const()[name = tensor("x_65_dilations_0"), val = tensor([1])]; tensor x_65_groups_0 = const()[name = tensor("x_65_groups_0"), val = tensor(1)]; - tensor model_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68252352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69300992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68246208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69294848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_65_cast_fp16 = conv(dilations = x_65_dilations_0, groups = x_65_groups_0, pad = x_65_pad_0, pad_type = x_65_pad_type_0, strides = x_65_strides_0, weight = model_layers_2_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_157_cast_fp16)[name = tensor("x_65_cast_fp16")]; tensor input_159_perm_0 = const()[name = tensor("input_159_perm_0"), val = tensor([0, 2, 1])]; tensor input_159_cast_fp16 = transpose(perm = input_159_perm_0, x = x_65_cast_fp16)[name = tensor("transpose_291")]; tensor input_161_cast_fp16 = add(x = input_143_cast_fp16, y = input_159_cast_fp16)[name = tensor("input_161_cast_fp16")]; tensor input_163_axes_0 = const()[name = tensor("input_163_axes_0"), val = tensor([-1])]; - tensor model_layers_2_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69303104)))]; - tensor model_layers_2_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69305216)))]; + tensor model_layers_2_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69296960)))]; + tensor model_layers_2_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69299072)))]; tensor input_163_cast_fp16 = layer_norm(axes = input_163_axes_0, beta = model_layers_2_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_2_norm_feed_forward2_weight_to_fp16, x = input_161_cast_fp16)[name = tensor("input_163_cast_fp16")]; - tensor model_layers_2_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69307328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73501696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_2_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69301184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73495552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_2_feed_forward2_linear1_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = tensor("linear_26_cast_fp16")]; tensor input_167_cast_fp16 = silu(x = linear_26_cast_fp16)[name = tensor("input_167_cast_fp16")]; - tensor model_layers_2_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73509952))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77704320))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_2_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_2_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73503808))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77698176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_2_feed_forward2_linear2_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = tensor("linear_27_cast_fp16")]; tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1p-1)]; tensor var_688_cast_fp16 = mul(x = linear_27_cast_fp16, y = var_687_to_fp16)[name = tensor("op_688_cast_fp16")]; tensor input_173_cast_fp16 = add(x = input_161_cast_fp16, y = var_688_cast_fp16)[name = tensor("input_173_cast_fp16")]; tensor input_175_axes_0 = const()[name = tensor("input_175_axes_0"), val = tensor([-1])]; - tensor model_layers_2_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77706432)))]; - tensor model_layers_2_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77708544)))]; + tensor model_layers_2_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_2_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77700288)))]; + tensor model_layers_2_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_2_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77702400)))]; tensor input_175_cast_fp16 = layer_norm(axes = input_175_axes_0, beta = model_layers_2_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_2_norm_out_weight_to_fp16, x = input_173_cast_fp16)[name = tensor("input_175_cast_fp16")]; tensor input_177_axes_0 = const()[name = tensor("input_177_axes_0"), val = tensor([-1])]; - tensor model_layers_3_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77710656)))]; - tensor model_layers_3_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77712768)))]; + tensor model_layers_3_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77704512)))]; + tensor model_layers_3_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77706624)))]; tensor input_177_cast_fp16 = layer_norm(axes = input_177_axes_0, beta = model_layers_3_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_3_norm_feed_forward1_weight_to_fp16, x = input_175_cast_fp16)[name = tensor("input_177_cast_fp16")]; - tensor model_layers_3_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77714880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81909248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_3_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77708736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81903104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_28_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_3_feed_forward1_linear1_weight_to_fp16_quantized, x = input_177_cast_fp16)[name = tensor("linear_28_cast_fp16")]; tensor input_181_cast_fp16 = silu(x = linear_28_cast_fp16)[name = tensor("input_181_cast_fp16")]; - tensor model_layers_3_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81917504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86111872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_3_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81911360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86105728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_29_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_3_feed_forward1_linear2_weight_to_fp16_quantized, x = input_181_cast_fp16)[name = tensor("linear_29_cast_fp16")]; tensor var_716_to_fp16 = const()[name = tensor("op_716_to_fp16"), val = tensor(0x1p-1)]; tensor var_717_cast_fp16 = mul(x = linear_29_cast_fp16, y = var_716_to_fp16)[name = tensor("op_717_cast_fp16")]; tensor input_187_cast_fp16 = add(x = input_175_cast_fp16, y = var_717_cast_fp16)[name = tensor("input_187_cast_fp16")]; tensor query_7_axes_0 = const()[name = tensor("query_7_axes_0"), val = tensor([-1])]; - tensor model_layers_3_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86113984)))]; - tensor model_layers_3_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86116096)))]; + tensor model_layers_3_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86107840)))]; + tensor model_layers_3_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86109952)))]; tensor query_7_cast_fp16 = layer_norm(axes = query_7_axes_0, beta = model_layers_3_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_3_norm_self_att_weight_to_fp16, x = input_187_cast_fp16)[name = tensor("query_7_cast_fp16")]; - tensor model_layers_3_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86118208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87166848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_3_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86112064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87160704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_30_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_3_self_attn_linear_q_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor("linear_30_cast_fp16")]; tensor var_733 = const()[name = tensor("op_733"), val = tensor([1, -1, 8, 128])]; tensor q_19_cast_fp16 = reshape(shape = var_733, x = linear_30_cast_fp16)[name = tensor("q_19_cast_fp16")]; - tensor model_layers_3_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87168960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88217600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_3_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87162816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88211456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_31_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_3_self_attn_linear_k_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor("linear_31_cast_fp16")]; tensor var_737 = const()[name = tensor("op_737"), val = tensor([1, -1, 8, 128])]; tensor k_13_cast_fp16 = reshape(shape = var_737, x = linear_31_cast_fp16)[name = tensor("k_13_cast_fp16")]; - tensor model_layers_3_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88219712))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89268352))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_3_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88213568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89262208))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_3_self_attn_linear_v_weight_to_fp16_quantized, x = query_7_cast_fp16)[name = tensor("linear_32_cast_fp16")]; tensor var_741 = const()[name = tensor("op_741"), val = tensor([1, -1, 8, 128])]; tensor v_7_cast_fp16 = reshape(shape = var_741, x = linear_32_cast_fp16)[name = tensor("v_7_cast_fp16")]; tensor value_7_perm_0 = const()[name = tensor("value_7_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_3_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_3_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89270464)))]; - tensor var_753_cast_fp16 = add(x = q_19_cast_fp16, y = model_layers_3_self_attn_pos_bias_u_to_fp16)[name = tensor("op_753_cast_fp16")]; - tensor model_layers_3_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_3_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89272576)))]; - tensor var_755_cast_fp16 = add(x = q_19_cast_fp16, y = model_layers_3_self_attn_pos_bias_v_to_fp16)[name = tensor("op_755_cast_fp16")]; + tensor model_layers_3_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89264320))), scale = tensor([0x1.cf4p-8, 0x1.144p-7, 0x1.e6p-8, 0x1.d4p-9, 0x1.948p-8, 0x1.f5cp-8, 0x1.048p-7, 0x1.9dcp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_753_cast_fp16 = add(x = q_19_cast_fp16, y = model_layers_3_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_753_cast_fp16")]; + tensor model_layers_3_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89265408))), scale = tensor([0x1.1dcp-8, 0x1.3ep-9, 0x1.f88p-9, 0x1.45p-8, 0x1.5f4p-8, 0x1.388p-8, 0x1.e14p-8, 0x1.42p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_755_cast_fp16 = add(x = q_19_cast_fp16, y = model_layers_3_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_755_cast_fp16")]; tensor q_with_bias_v_7_perm_0 = const()[name = tensor("q_with_bias_v_7_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_73_transpose_x_0 = const()[name = tensor("x_73_transpose_x_0"), val = tensor(false)]; tensor x_73_transpose_y_0 = const()[name = tensor("x_73_transpose_y_0"), val = tensor(false)]; - tensor op_757_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_757_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89274688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89531776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_757_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_757_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89266496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89523584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_7_cast_fp16 = transpose(perm = q_with_bias_v_7_perm_0, x = var_755_cast_fp16)[name = tensor("transpose_290")]; tensor x_73_cast_fp16 = matmul(transpose_x = x_73_transpose_x_0, transpose_y = x_73_transpose_y_0, x = q_with_bias_v_7_cast_fp16, y = op_757_to_fp16_quantized)[name = tensor("x_73_cast_fp16")]; tensor x_75_pad_0 = const()[name = tensor("x_75_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -595,12 +595,12 @@ program(1.0) tensor var_790 = const()[name = tensor("op_790"), val = tensor([1, -1, 1024])]; tensor var_789_cast_fp16 = transpose(perm = var_789_perm_0, x = x_79_cast_fp16)[name = tensor("transpose_286")]; tensor input_191_cast_fp16 = reshape(shape = var_790, x = var_789_cast_fp16)[name = tensor("input_191_cast_fp16")]; - tensor model_layers_3_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89532352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90580992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_3_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89524160))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90572800))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_34_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_3_self_attn_linear_out_weight_to_fp16_quantized, x = input_191_cast_fp16)[name = tensor("linear_34_cast_fp16")]; tensor input_195_cast_fp16 = add(x = input_187_cast_fp16, y = linear_34_cast_fp16)[name = tensor("input_195_cast_fp16")]; tensor x_83_axes_0 = const()[name = tensor("x_83_axes_0"), val = tensor([-1])]; - tensor model_layers_3_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90583104)))]; - tensor model_layers_3_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90585216)))]; + tensor model_layers_3_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90574912)))]; + tensor model_layers_3_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90577024)))]; tensor x_83_cast_fp16 = layer_norm(axes = x_83_axes_0, beta = model_layers_3_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_3_norm_conv_weight_to_fp16, x = input_195_cast_fp16)[name = tensor("x_83_cast_fp16")]; tensor input_197_perm_0 = const()[name = tensor("input_197_perm_0"), val = tensor([0, 2, 1])]; tensor input_199_pad_type_0 = const()[name = tensor("input_199_pad_type_0"), val = tensor("valid")]; @@ -608,7 +608,7 @@ program(1.0) tensor input_199_pad_0 = const()[name = tensor("input_199_pad_0"), val = tensor([0, 0])]; tensor input_199_dilations_0 = const()[name = tensor("input_199_dilations_0"), val = tensor([1])]; tensor input_199_groups_0 = const()[name = tensor("input_199_groups_0"), val = tensor(1)]; - tensor model_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90587328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92684544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90579136))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92676352))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_197_cast_fp16 = transpose(perm = input_197_perm_0, x = x_83_cast_fp16)[name = tensor("transpose_285")]; tensor input_199_cast_fp16 = conv(dilations = input_199_dilations_0, groups = input_199_groups_0, pad = input_199_pad_0, pad_type = input_199_pad_type_0, strides = input_199_strides_0, weight = model_layers_3_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_197_cast_fp16)[name = tensor("input_199_cast_fp16")]; tensor x_85_split_num_splits_0 = const()[name = tensor("x_85_split_num_splits_0"), val = tensor(2)]; @@ -626,8 +626,8 @@ program(1.0) tensor input_205_strides_0 = const()[name = tensor("input_205_strides_0"), val = tensor([1])]; tensor input_205_pad_0 = const()[name = tensor("input_205_pad_0"), val = tensor([0, 0])]; tensor input_205_dilations_0 = const()[name = tensor("input_205_dilations_0"), val = tensor([1])]; - tensor const_254_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_254_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92688704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92697984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_255_to_fp16 = const()[name = tensor("const_255_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92700096)))]; + tensor const_254_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_254_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92680512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92689792))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_255_to_fp16 = const()[name = tensor("const_255_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92691904)))]; tensor input_207_cast_fp16 = conv(bias = const_255_to_fp16, dilations = input_205_dilations_0, groups = input_205_groups_0, pad = input_205_pad_0, pad_type = input_205_pad_type_0, strides = input_205_strides_0, weight = const_254_to_fp16_quantized, x = input_203_cast_fp16)[name = tensor("input_207_cast_fp16")]; tensor input_209_cast_fp16 = silu(x = input_207_cast_fp16)[name = tensor("input_209_cast_fp16")]; tensor x_87_pad_type_0 = const()[name = tensor("x_87_pad_type_0"), val = tensor("valid")]; @@ -635,64 +635,64 @@ program(1.0) tensor x_87_pad_0 = const()[name = tensor("x_87_pad_0"), val = tensor([0, 0])]; tensor x_87_dilations_0 = const()[name = tensor("x_87_dilations_0"), val = tensor([1])]; tensor x_87_groups_0 = const()[name = tensor("x_87_groups_0"), val = tensor(1)]; - tensor model_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92702208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93750848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92694016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93742656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_87_cast_fp16 = conv(dilations = x_87_dilations_0, groups = x_87_groups_0, pad = x_87_pad_0, pad_type = x_87_pad_type_0, strides = x_87_strides_0, weight = model_layers_3_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_209_cast_fp16)[name = tensor("x_87_cast_fp16")]; tensor input_211_perm_0 = const()[name = tensor("input_211_perm_0"), val = tensor([0, 2, 1])]; tensor input_211_cast_fp16 = transpose(perm = input_211_perm_0, x = x_87_cast_fp16)[name = tensor("transpose_284")]; tensor input_213_cast_fp16 = add(x = input_195_cast_fp16, y = input_211_cast_fp16)[name = tensor("input_213_cast_fp16")]; tensor input_215_axes_0 = const()[name = tensor("input_215_axes_0"), val = tensor([-1])]; - tensor model_layers_3_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93752960)))]; - tensor model_layers_3_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93755072)))]; + tensor model_layers_3_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93744768)))]; + tensor model_layers_3_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93746880)))]; tensor input_215_cast_fp16 = layer_norm(axes = input_215_axes_0, beta = model_layers_3_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_3_norm_feed_forward2_weight_to_fp16, x = input_213_cast_fp16)[name = tensor("input_215_cast_fp16")]; - tensor model_layers_3_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93757184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97951552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_3_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93748992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97943360))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_35_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_3_feed_forward2_linear1_weight_to_fp16_quantized, x = input_215_cast_fp16)[name = tensor("linear_35_cast_fp16")]; tensor input_219_cast_fp16 = silu(x = linear_35_cast_fp16)[name = tensor("input_219_cast_fp16")]; - tensor model_layers_3_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97959808))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102154176))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_3_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_3_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97951616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102145984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_36_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_3_feed_forward2_linear2_weight_to_fp16_quantized, x = input_219_cast_fp16)[name = tensor("linear_36_cast_fp16")]; tensor var_850_to_fp16 = const()[name = tensor("op_850_to_fp16"), val = tensor(0x1p-1)]; tensor var_851_cast_fp16 = mul(x = linear_36_cast_fp16, y = var_850_to_fp16)[name = tensor("op_851_cast_fp16")]; tensor input_225_cast_fp16 = add(x = input_213_cast_fp16, y = var_851_cast_fp16)[name = tensor("input_225_cast_fp16")]; tensor input_227_axes_0 = const()[name = tensor("input_227_axes_0"), val = tensor([-1])]; - tensor model_layers_3_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102156288)))]; - tensor model_layers_3_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102158400)))]; + tensor model_layers_3_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_3_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102148096)))]; + tensor model_layers_3_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_3_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102150208)))]; tensor input_227_cast_fp16 = layer_norm(axes = input_227_axes_0, beta = model_layers_3_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_3_norm_out_weight_to_fp16, x = input_225_cast_fp16)[name = tensor("input_227_cast_fp16")]; tensor input_229_axes_0 = const()[name = tensor("input_229_axes_0"), val = tensor([-1])]; - tensor model_layers_4_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102160512)))]; - tensor model_layers_4_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102162624)))]; + tensor model_layers_4_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102152320)))]; + tensor model_layers_4_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102154432)))]; tensor input_229_cast_fp16 = layer_norm(axes = input_229_axes_0, beta = model_layers_4_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_4_norm_feed_forward1_weight_to_fp16, x = input_227_cast_fp16)[name = tensor("input_229_cast_fp16")]; - tensor model_layers_4_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102164736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106359104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_4_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102156544))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106350912))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_4_feed_forward1_linear1_weight_to_fp16_quantized, x = input_229_cast_fp16)[name = tensor("linear_37_cast_fp16")]; tensor input_233_cast_fp16 = silu(x = linear_37_cast_fp16)[name = tensor("input_233_cast_fp16")]; - tensor model_layers_4_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106367360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110561728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_4_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106359168))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110553536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_38_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_4_feed_forward1_linear2_weight_to_fp16_quantized, x = input_233_cast_fp16)[name = tensor("linear_38_cast_fp16")]; tensor var_879_to_fp16 = const()[name = tensor("op_879_to_fp16"), val = tensor(0x1p-1)]; tensor var_880_cast_fp16 = mul(x = linear_38_cast_fp16, y = var_879_to_fp16)[name = tensor("op_880_cast_fp16")]; tensor input_239_cast_fp16 = add(x = input_227_cast_fp16, y = var_880_cast_fp16)[name = tensor("input_239_cast_fp16")]; tensor query_9_axes_0 = const()[name = tensor("query_9_axes_0"), val = tensor([-1])]; - tensor model_layers_4_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110563840)))]; - tensor model_layers_4_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110565952)))]; + tensor model_layers_4_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110555648)))]; + tensor model_layers_4_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110557760)))]; tensor query_9_cast_fp16 = layer_norm(axes = query_9_axes_0, beta = model_layers_4_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_4_norm_self_att_weight_to_fp16, x = input_239_cast_fp16)[name = tensor("query_9_cast_fp16")]; - tensor model_layers_4_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110568064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111616704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_4_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110559872))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111608512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_39_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_4_self_attn_linear_q_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor("linear_39_cast_fp16")]; tensor var_896 = const()[name = tensor("op_896"), val = tensor([1, -1, 8, 128])]; tensor q_25_cast_fp16 = reshape(shape = var_896, x = linear_39_cast_fp16)[name = tensor("q_25_cast_fp16")]; - tensor model_layers_4_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111618816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112667456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_4_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111610624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112659264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_40_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_4_self_attn_linear_k_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor("linear_40_cast_fp16")]; tensor var_900 = const()[name = tensor("op_900"), val = tensor([1, -1, 8, 128])]; tensor k_17_cast_fp16 = reshape(shape = var_900, x = linear_40_cast_fp16)[name = tensor("k_17_cast_fp16")]; - tensor model_layers_4_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112669568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113718208))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_4_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112661376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113710016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_41_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_4_self_attn_linear_v_weight_to_fp16_quantized, x = query_9_cast_fp16)[name = tensor("linear_41_cast_fp16")]; tensor var_904 = const()[name = tensor("op_904"), val = tensor([1, -1, 8, 128])]; tensor v_9_cast_fp16 = reshape(shape = var_904, x = linear_41_cast_fp16)[name = tensor("v_9_cast_fp16")]; tensor value_9_perm_0 = const()[name = tensor("value_9_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_4_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_4_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113720320)))]; - tensor var_916_cast_fp16 = add(x = q_25_cast_fp16, y = model_layers_4_self_attn_pos_bias_u_to_fp16)[name = tensor("op_916_cast_fp16")]; - tensor model_layers_4_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_4_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113722432)))]; - tensor var_918_cast_fp16 = add(x = q_25_cast_fp16, y = model_layers_4_self_attn_pos_bias_v_to_fp16)[name = tensor("op_918_cast_fp16")]; + tensor model_layers_4_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113712128))), scale = tensor([0x1.084p-7, 0x1.948p-8, 0x1.0dcp-7, 0x1.3fp-7, 0x1.fep-7, 0x1.2fp-7, 0x1.a84p-8, 0x1.748p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_916_cast_fp16 = add(x = q_25_cast_fp16, y = model_layers_4_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_916_cast_fp16")]; + tensor model_layers_4_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113713216))), scale = tensor([0x1.3dp-8, 0x1.554p-9, 0x1.13p-7, 0x1.1b4p-8, 0x1.a68p-11, 0x1.174p-9, 0x1.2dcp-8, 0x1.6f8p-10]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_918_cast_fp16 = add(x = q_25_cast_fp16, y = model_layers_4_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_918_cast_fp16")]; tensor q_with_bias_v_9_perm_0 = const()[name = tensor("q_with_bias_v_9_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_95_transpose_x_0 = const()[name = tensor("x_95_transpose_x_0"), val = tensor(false)]; tensor x_95_transpose_y_0 = const()[name = tensor("x_95_transpose_y_0"), val = tensor(false)]; - tensor op_920_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_920_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113724544))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113981632))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_920_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_920_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113714304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113971392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_9_cast_fp16 = transpose(perm = q_with_bias_v_9_perm_0, x = var_918_cast_fp16)[name = tensor("transpose_283")]; tensor x_95_cast_fp16 = matmul(transpose_x = x_95_transpose_x_0, transpose_y = x_95_transpose_y_0, x = q_with_bias_v_9_cast_fp16, y = op_920_to_fp16_quantized)[name = tensor("x_95_cast_fp16")]; tensor x_97_pad_0 = const()[name = tensor("x_97_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -732,12 +732,12 @@ program(1.0) tensor var_953 = const()[name = tensor("op_953"), val = tensor([1, -1, 1024])]; tensor var_952_cast_fp16 = transpose(perm = var_952_perm_0, x = x_101_cast_fp16)[name = tensor("transpose_279")]; tensor input_243_cast_fp16 = reshape(shape = var_953, x = var_952_cast_fp16)[name = tensor("input_243_cast_fp16")]; - tensor model_layers_4_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113982208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115030848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_4_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113971968))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115020608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_43_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_4_self_attn_linear_out_weight_to_fp16_quantized, x = input_243_cast_fp16)[name = tensor("linear_43_cast_fp16")]; tensor input_247_cast_fp16 = add(x = input_239_cast_fp16, y = linear_43_cast_fp16)[name = tensor("input_247_cast_fp16")]; tensor x_105_axes_0 = const()[name = tensor("x_105_axes_0"), val = tensor([-1])]; - tensor model_layers_4_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115032960)))]; - tensor model_layers_4_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115035072)))]; + tensor model_layers_4_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115022720)))]; + tensor model_layers_4_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115024832)))]; tensor x_105_cast_fp16 = layer_norm(axes = x_105_axes_0, beta = model_layers_4_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_4_norm_conv_weight_to_fp16, x = input_247_cast_fp16)[name = tensor("x_105_cast_fp16")]; tensor input_249_perm_0 = const()[name = tensor("input_249_perm_0"), val = tensor([0, 2, 1])]; tensor input_251_pad_type_0 = const()[name = tensor("input_251_pad_type_0"), val = tensor("valid")]; @@ -745,7 +745,7 @@ program(1.0) tensor input_251_pad_0 = const()[name = tensor("input_251_pad_0"), val = tensor([0, 0])]; tensor input_251_dilations_0 = const()[name = tensor("input_251_dilations_0"), val = tensor([1])]; tensor input_251_groups_0 = const()[name = tensor("input_251_groups_0"), val = tensor(1)]; - tensor model_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115037184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117134400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115026944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117124160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_249_cast_fp16 = transpose(perm = input_249_perm_0, x = x_105_cast_fp16)[name = tensor("transpose_278")]; tensor input_251_cast_fp16 = conv(dilations = input_251_dilations_0, groups = input_251_groups_0, pad = input_251_pad_0, pad_type = input_251_pad_type_0, strides = input_251_strides_0, weight = model_layers_4_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_249_cast_fp16)[name = tensor("input_251_cast_fp16")]; tensor x_107_split_num_splits_0 = const()[name = tensor("x_107_split_num_splits_0"), val = tensor(2)]; @@ -763,8 +763,8 @@ program(1.0) tensor input_257_strides_0 = const()[name = tensor("input_257_strides_0"), val = tensor([1])]; tensor input_257_pad_0 = const()[name = tensor("input_257_pad_0"), val = tensor([0, 0])]; tensor input_257_dilations_0 = const()[name = tensor("input_257_dilations_0"), val = tensor([1])]; - tensor const_256_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_256_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117138560))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117147840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_257_to_fp16 = const()[name = tensor("const_257_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117149952)))]; + tensor const_256_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_256_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117128320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117137600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_257_to_fp16 = const()[name = tensor("const_257_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117139712)))]; tensor input_259_cast_fp16 = conv(bias = const_257_to_fp16, dilations = input_257_dilations_0, groups = input_257_groups_0, pad = input_257_pad_0, pad_type = input_257_pad_type_0, strides = input_257_strides_0, weight = const_256_to_fp16_quantized, x = input_255_cast_fp16)[name = tensor("input_259_cast_fp16")]; tensor input_261_cast_fp16 = silu(x = input_259_cast_fp16)[name = tensor("input_261_cast_fp16")]; tensor x_109_pad_type_0 = const()[name = tensor("x_109_pad_type_0"), val = tensor("valid")]; @@ -772,64 +772,64 @@ program(1.0) tensor x_109_pad_0 = const()[name = tensor("x_109_pad_0"), val = tensor([0, 0])]; tensor x_109_dilations_0 = const()[name = tensor("x_109_dilations_0"), val = tensor([1])]; tensor x_109_groups_0 = const()[name = tensor("x_109_groups_0"), val = tensor(1)]; - tensor model_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117152064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118200704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117141824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118190464))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_109_cast_fp16 = conv(dilations = x_109_dilations_0, groups = x_109_groups_0, pad = x_109_pad_0, pad_type = x_109_pad_type_0, strides = x_109_strides_0, weight = model_layers_4_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_261_cast_fp16)[name = tensor("x_109_cast_fp16")]; tensor input_263_perm_0 = const()[name = tensor("input_263_perm_0"), val = tensor([0, 2, 1])]; tensor input_263_cast_fp16 = transpose(perm = input_263_perm_0, x = x_109_cast_fp16)[name = tensor("transpose_277")]; tensor input_265_cast_fp16 = add(x = input_247_cast_fp16, y = input_263_cast_fp16)[name = tensor("input_265_cast_fp16")]; tensor input_267_axes_0 = const()[name = tensor("input_267_axes_0"), val = tensor([-1])]; - tensor model_layers_4_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118202816)))]; - tensor model_layers_4_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118204928)))]; + tensor model_layers_4_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118192576)))]; + tensor model_layers_4_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118194688)))]; tensor input_267_cast_fp16 = layer_norm(axes = input_267_axes_0, beta = model_layers_4_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_4_norm_feed_forward2_weight_to_fp16, x = input_265_cast_fp16)[name = tensor("input_267_cast_fp16")]; - tensor model_layers_4_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118207040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122401408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_4_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118196800))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122391168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_4_feed_forward2_linear1_weight_to_fp16_quantized, x = input_267_cast_fp16)[name = tensor("linear_44_cast_fp16")]; tensor input_271_cast_fp16 = silu(x = linear_44_cast_fp16)[name = tensor("input_271_cast_fp16")]; - tensor model_layers_4_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122409664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126604032))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_4_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_4_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122399424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126593792))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_45_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_4_feed_forward2_linear2_weight_to_fp16_quantized, x = input_271_cast_fp16)[name = tensor("linear_45_cast_fp16")]; tensor var_1013_to_fp16 = const()[name = tensor("op_1013_to_fp16"), val = tensor(0x1p-1)]; tensor var_1014_cast_fp16 = mul(x = linear_45_cast_fp16, y = var_1013_to_fp16)[name = tensor("op_1014_cast_fp16")]; tensor input_277_cast_fp16 = add(x = input_265_cast_fp16, y = var_1014_cast_fp16)[name = tensor("input_277_cast_fp16")]; tensor input_279_axes_0 = const()[name = tensor("input_279_axes_0"), val = tensor([-1])]; - tensor model_layers_4_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126606144)))]; - tensor model_layers_4_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126608256)))]; + tensor model_layers_4_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_4_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126595904)))]; + tensor model_layers_4_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_4_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126598016)))]; tensor input_279_cast_fp16 = layer_norm(axes = input_279_axes_0, beta = model_layers_4_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_4_norm_out_weight_to_fp16, x = input_277_cast_fp16)[name = tensor("input_279_cast_fp16")]; tensor input_281_axes_0 = const()[name = tensor("input_281_axes_0"), val = tensor([-1])]; - tensor model_layers_5_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126610368)))]; - tensor model_layers_5_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126612480)))]; + tensor model_layers_5_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126600128)))]; + tensor model_layers_5_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126602240)))]; tensor input_281_cast_fp16 = layer_norm(axes = input_281_axes_0, beta = model_layers_5_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_5_norm_feed_forward1_weight_to_fp16, x = input_279_cast_fp16)[name = tensor("input_281_cast_fp16")]; - tensor model_layers_5_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126614592))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130808960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_5_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(126604352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130798720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_5_feed_forward1_linear1_weight_to_fp16_quantized, x = input_281_cast_fp16)[name = tensor("linear_46_cast_fp16")]; tensor input_285_cast_fp16 = silu(x = linear_46_cast_fp16)[name = tensor("input_285_cast_fp16")]; - tensor model_layers_5_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130817216))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135011584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_5_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130806976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135001344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_5_feed_forward1_linear2_weight_to_fp16_quantized, x = input_285_cast_fp16)[name = tensor("linear_47_cast_fp16")]; tensor var_1042_to_fp16 = const()[name = tensor("op_1042_to_fp16"), val = tensor(0x1p-1)]; tensor var_1043_cast_fp16 = mul(x = linear_47_cast_fp16, y = var_1042_to_fp16)[name = tensor("op_1043_cast_fp16")]; tensor input_291_cast_fp16 = add(x = input_279_cast_fp16, y = var_1043_cast_fp16)[name = tensor("input_291_cast_fp16")]; tensor query_11_axes_0 = const()[name = tensor("query_11_axes_0"), val = tensor([-1])]; - tensor model_layers_5_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135013696)))]; - tensor model_layers_5_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135015808)))]; + tensor model_layers_5_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135003456)))]; + tensor model_layers_5_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135005568)))]; tensor query_11_cast_fp16 = layer_norm(axes = query_11_axes_0, beta = model_layers_5_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_5_norm_self_att_weight_to_fp16, x = input_291_cast_fp16)[name = tensor("query_11_cast_fp16")]; - tensor model_layers_5_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135017920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136066560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_5_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135007680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136056320))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_48_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_5_self_attn_linear_q_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor("linear_48_cast_fp16")]; tensor var_1059 = const()[name = tensor("op_1059"), val = tensor([1, -1, 8, 128])]; tensor q_31_cast_fp16 = reshape(shape = var_1059, x = linear_48_cast_fp16)[name = tensor("q_31_cast_fp16")]; - tensor model_layers_5_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136068672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137117312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_5_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136058432))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137107072))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_49_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_5_self_attn_linear_k_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor("linear_49_cast_fp16")]; tensor var_1063 = const()[name = tensor("op_1063"), val = tensor([1, -1, 8, 128])]; tensor k_21_cast_fp16 = reshape(shape = var_1063, x = linear_49_cast_fp16)[name = tensor("k_21_cast_fp16")]; - tensor model_layers_5_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137119424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138168064))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_5_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137109184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138157824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_50_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_5_self_attn_linear_v_weight_to_fp16_quantized, x = query_11_cast_fp16)[name = tensor("linear_50_cast_fp16")]; tensor var_1067 = const()[name = tensor("op_1067"), val = tensor([1, -1, 8, 128])]; tensor v_11_cast_fp16 = reshape(shape = var_1067, x = linear_50_cast_fp16)[name = tensor("v_11_cast_fp16")]; tensor value_11_perm_0 = const()[name = tensor("value_11_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_5_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_5_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138170176)))]; - tensor var_1079_cast_fp16 = add(x = q_31_cast_fp16, y = model_layers_5_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1079_cast_fp16")]; - tensor model_layers_5_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_5_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138172288)))]; - tensor var_1081_cast_fp16 = add(x = q_31_cast_fp16, y = model_layers_5_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1081_cast_fp16")]; + tensor model_layers_5_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138159936))), scale = tensor([0x1.118p-7, 0x1.8ap-8, 0x1.59p-7, 0x1.4dp-7, 0x1.d14p-8, 0x1.3c4p-7, 0x1.758p-7, 0x1.6cp-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1079_cast_fp16 = add(x = q_31_cast_fp16, y = model_layers_5_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_1079_cast_fp16")]; + tensor model_layers_5_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138161024))), scale = tensor([0x1.8a4p-9, 0x1.dap-8, 0x1.228p-8, 0x1.3dcp-9, 0x1.f9cp-8, 0x1.1c8p-8, 0x1.e3p-8, 0x1.948p-9]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1081_cast_fp16 = add(x = q_31_cast_fp16, y = model_layers_5_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_1081_cast_fp16")]; tensor q_with_bias_v_11_perm_0 = const()[name = tensor("q_with_bias_v_11_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_117_transpose_x_0 = const()[name = tensor("x_117_transpose_x_0"), val = tensor(false)]; tensor x_117_transpose_y_0 = const()[name = tensor("x_117_transpose_y_0"), val = tensor(false)]; - tensor op_1083_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1083_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138174400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138431488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_1083_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1083_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138162112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138419200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_11_cast_fp16 = transpose(perm = q_with_bias_v_11_perm_0, x = var_1081_cast_fp16)[name = tensor("transpose_276")]; tensor x_117_cast_fp16 = matmul(transpose_x = x_117_transpose_x_0, transpose_y = x_117_transpose_y_0, x = q_with_bias_v_11_cast_fp16, y = op_1083_to_fp16_quantized)[name = tensor("x_117_cast_fp16")]; tensor x_119_pad_0 = const()[name = tensor("x_119_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -869,12 +869,12 @@ program(1.0) tensor var_1116 = const()[name = tensor("op_1116"), val = tensor([1, -1, 1024])]; tensor var_1115_cast_fp16 = transpose(perm = var_1115_perm_0, x = x_123_cast_fp16)[name = tensor("transpose_272")]; tensor input_295_cast_fp16 = reshape(shape = var_1116, x = var_1115_cast_fp16)[name = tensor("input_295_cast_fp16")]; - tensor model_layers_5_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138432064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139480704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_5_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138419776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139468416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_5_self_attn_linear_out_weight_to_fp16_quantized, x = input_295_cast_fp16)[name = tensor("linear_52_cast_fp16")]; tensor input_299_cast_fp16 = add(x = input_291_cast_fp16, y = linear_52_cast_fp16)[name = tensor("input_299_cast_fp16")]; tensor x_127_axes_0 = const()[name = tensor("x_127_axes_0"), val = tensor([-1])]; - tensor model_layers_5_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139482816)))]; - tensor model_layers_5_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139484928)))]; + tensor model_layers_5_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139470528)))]; + tensor model_layers_5_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139472640)))]; tensor x_127_cast_fp16 = layer_norm(axes = x_127_axes_0, beta = model_layers_5_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_5_norm_conv_weight_to_fp16, x = input_299_cast_fp16)[name = tensor("x_127_cast_fp16")]; tensor input_301_perm_0 = const()[name = tensor("input_301_perm_0"), val = tensor([0, 2, 1])]; tensor input_303_pad_type_0 = const()[name = tensor("input_303_pad_type_0"), val = tensor("valid")]; @@ -882,7 +882,7 @@ program(1.0) tensor input_303_pad_0 = const()[name = tensor("input_303_pad_0"), val = tensor([0, 0])]; tensor input_303_dilations_0 = const()[name = tensor("input_303_dilations_0"), val = tensor([1])]; tensor input_303_groups_0 = const()[name = tensor("input_303_groups_0"), val = tensor(1)]; - tensor model_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139487040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141584256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139474752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141571968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_301_cast_fp16 = transpose(perm = input_301_perm_0, x = x_127_cast_fp16)[name = tensor("transpose_271")]; tensor input_303_cast_fp16 = conv(dilations = input_303_dilations_0, groups = input_303_groups_0, pad = input_303_pad_0, pad_type = input_303_pad_type_0, strides = input_303_strides_0, weight = model_layers_5_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_301_cast_fp16)[name = tensor("input_303_cast_fp16")]; tensor x_129_split_num_splits_0 = const()[name = tensor("x_129_split_num_splits_0"), val = tensor(2)]; @@ -900,8 +900,8 @@ program(1.0) tensor input_309_strides_0 = const()[name = tensor("input_309_strides_0"), val = tensor([1])]; tensor input_309_pad_0 = const()[name = tensor("input_309_pad_0"), val = tensor([0, 0])]; tensor input_309_dilations_0 = const()[name = tensor("input_309_dilations_0"), val = tensor([1])]; - tensor const_258_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_258_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141588416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141597696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_259_to_fp16 = const()[name = tensor("const_259_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141599808)))]; + tensor const_258_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_258_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141576128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141585408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_259_to_fp16 = const()[name = tensor("const_259_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141587520)))]; tensor input_311_cast_fp16 = conv(bias = const_259_to_fp16, dilations = input_309_dilations_0, groups = input_309_groups_0, pad = input_309_pad_0, pad_type = input_309_pad_type_0, strides = input_309_strides_0, weight = const_258_to_fp16_quantized, x = input_307_cast_fp16)[name = tensor("input_311_cast_fp16")]; tensor input_313_cast_fp16 = silu(x = input_311_cast_fp16)[name = tensor("input_313_cast_fp16")]; tensor x_131_pad_type_0 = const()[name = tensor("x_131_pad_type_0"), val = tensor("valid")]; @@ -909,64 +909,64 @@ program(1.0) tensor x_131_pad_0 = const()[name = tensor("x_131_pad_0"), val = tensor([0, 0])]; tensor x_131_dilations_0 = const()[name = tensor("x_131_dilations_0"), val = tensor([1])]; tensor x_131_groups_0 = const()[name = tensor("x_131_groups_0"), val = tensor(1)]; - tensor model_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141601920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142650560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141589632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142638272))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_131_cast_fp16 = conv(dilations = x_131_dilations_0, groups = x_131_groups_0, pad = x_131_pad_0, pad_type = x_131_pad_type_0, strides = x_131_strides_0, weight = model_layers_5_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_313_cast_fp16)[name = tensor("x_131_cast_fp16")]; tensor input_315_perm_0 = const()[name = tensor("input_315_perm_0"), val = tensor([0, 2, 1])]; tensor input_315_cast_fp16 = transpose(perm = input_315_perm_0, x = x_131_cast_fp16)[name = tensor("transpose_270")]; tensor input_317_cast_fp16 = add(x = input_299_cast_fp16, y = input_315_cast_fp16)[name = tensor("input_317_cast_fp16")]; tensor input_319_axes_0 = const()[name = tensor("input_319_axes_0"), val = tensor([-1])]; - tensor model_layers_5_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142652672)))]; - tensor model_layers_5_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142654784)))]; + tensor model_layers_5_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142640384)))]; + tensor model_layers_5_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142642496)))]; tensor input_319_cast_fp16 = layer_norm(axes = input_319_axes_0, beta = model_layers_5_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_5_norm_feed_forward2_weight_to_fp16, x = input_317_cast_fp16)[name = tensor("input_319_cast_fp16")]; - tensor model_layers_5_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142656896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146851264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_5_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142644608))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146838976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_53_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_5_feed_forward2_linear1_weight_to_fp16_quantized, x = input_319_cast_fp16)[name = tensor("linear_53_cast_fp16")]; tensor input_323_cast_fp16 = silu(x = linear_53_cast_fp16)[name = tensor("input_323_cast_fp16")]; - tensor model_layers_5_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146859520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151053888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_5_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_5_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146847232))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151041600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_54_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_5_feed_forward2_linear2_weight_to_fp16_quantized, x = input_323_cast_fp16)[name = tensor("linear_54_cast_fp16")]; tensor var_1176_to_fp16 = const()[name = tensor("op_1176_to_fp16"), val = tensor(0x1p-1)]; tensor var_1177_cast_fp16 = mul(x = linear_54_cast_fp16, y = var_1176_to_fp16)[name = tensor("op_1177_cast_fp16")]; tensor input_329_cast_fp16 = add(x = input_317_cast_fp16, y = var_1177_cast_fp16)[name = tensor("input_329_cast_fp16")]; tensor input_331_axes_0 = const()[name = tensor("input_331_axes_0"), val = tensor([-1])]; - tensor model_layers_5_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151056000)))]; - tensor model_layers_5_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151058112)))]; + tensor model_layers_5_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_5_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151043712)))]; + tensor model_layers_5_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_5_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151045824)))]; tensor input_331_cast_fp16 = layer_norm(axes = input_331_axes_0, beta = model_layers_5_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_5_norm_out_weight_to_fp16, x = input_329_cast_fp16)[name = tensor("input_331_cast_fp16")]; tensor input_333_axes_0 = const()[name = tensor("input_333_axes_0"), val = tensor([-1])]; - tensor model_layers_6_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151060224)))]; - tensor model_layers_6_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151062336)))]; + tensor model_layers_6_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151047936)))]; + tensor model_layers_6_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151050048)))]; tensor input_333_cast_fp16 = layer_norm(axes = input_333_axes_0, beta = model_layers_6_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_6_norm_feed_forward1_weight_to_fp16, x = input_331_cast_fp16)[name = tensor("input_333_cast_fp16")]; - tensor model_layers_6_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151064448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155258816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_6_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151052160))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155246528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_6_feed_forward1_linear1_weight_to_fp16_quantized, x = input_333_cast_fp16)[name = tensor("linear_55_cast_fp16")]; tensor input_337_cast_fp16 = silu(x = linear_55_cast_fp16)[name = tensor("input_337_cast_fp16")]; - tensor model_layers_6_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155267072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159461440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_6_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(155254784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159449152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_56_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_6_feed_forward1_linear2_weight_to_fp16_quantized, x = input_337_cast_fp16)[name = tensor("linear_56_cast_fp16")]; tensor var_1205_to_fp16 = const()[name = tensor("op_1205_to_fp16"), val = tensor(0x1p-1)]; tensor var_1206_cast_fp16 = mul(x = linear_56_cast_fp16, y = var_1205_to_fp16)[name = tensor("op_1206_cast_fp16")]; tensor input_343_cast_fp16 = add(x = input_331_cast_fp16, y = var_1206_cast_fp16)[name = tensor("input_343_cast_fp16")]; tensor query_13_axes_0 = const()[name = tensor("query_13_axes_0"), val = tensor([-1])]; - tensor model_layers_6_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159463552)))]; - tensor model_layers_6_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159465664)))]; + tensor model_layers_6_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159451264)))]; + tensor model_layers_6_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159453376)))]; tensor query_13_cast_fp16 = layer_norm(axes = query_13_axes_0, beta = model_layers_6_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_6_norm_self_att_weight_to_fp16, x = input_343_cast_fp16)[name = tensor("query_13_cast_fp16")]; - tensor model_layers_6_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159467776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160516416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_6_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159455488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160504128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_6_self_attn_linear_q_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor("linear_57_cast_fp16")]; tensor var_1222 = const()[name = tensor("op_1222"), val = tensor([1, -1, 8, 128])]; tensor q_37_cast_fp16 = reshape(shape = var_1222, x = linear_57_cast_fp16)[name = tensor("q_37_cast_fp16")]; - tensor model_layers_6_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160518528))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161567168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_6_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160506240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161554880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_58_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_6_self_attn_linear_k_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor("linear_58_cast_fp16")]; tensor var_1226 = const()[name = tensor("op_1226"), val = tensor([1, -1, 8, 128])]; tensor k_25_cast_fp16 = reshape(shape = var_1226, x = linear_58_cast_fp16)[name = tensor("k_25_cast_fp16")]; - tensor model_layers_6_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161569280))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162617920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_6_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161556992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162605632))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_59_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_6_self_attn_linear_v_weight_to_fp16_quantized, x = query_13_cast_fp16)[name = tensor("linear_59_cast_fp16")]; tensor var_1230 = const()[name = tensor("op_1230"), val = tensor([1, -1, 8, 128])]; tensor v_13_cast_fp16 = reshape(shape = var_1230, x = linear_59_cast_fp16)[name = tensor("v_13_cast_fp16")]; tensor value_13_perm_0 = const()[name = tensor("value_13_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_6_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_6_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162620032)))]; - tensor var_1242_cast_fp16 = add(x = q_37_cast_fp16, y = model_layers_6_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1242_cast_fp16")]; - tensor model_layers_6_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_6_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162622144)))]; - tensor var_1244_cast_fp16 = add(x = q_37_cast_fp16, y = model_layers_6_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1244_cast_fp16")]; + tensor model_layers_6_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162607744))), scale = tensor([0x1.a08p-9, 0x1.75p-7, 0x1.01cp-8, 0x1.cp-8, 0x1.26p-7, 0x1.07cp-7, 0x1.dep-8, 0x1.86p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1242_cast_fp16 = add(x = q_37_cast_fp16, y = model_layers_6_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_1242_cast_fp16")]; + tensor model_layers_6_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162608832))), scale = tensor([0x1.adcp-10, 0x1.978p-9, 0x1.d5p-8, 0x1.238p-8, 0x1.72cp-8, 0x1.7ecp-9, 0x1.24cp-7, 0x1.34cp-9]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1244_cast_fp16 = add(x = q_37_cast_fp16, y = model_layers_6_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_1244_cast_fp16")]; tensor q_with_bias_v_13_perm_0 = const()[name = tensor("q_with_bias_v_13_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_139_transpose_x_0 = const()[name = tensor("x_139_transpose_x_0"), val = tensor(false)]; tensor x_139_transpose_y_0 = const()[name = tensor("x_139_transpose_y_0"), val = tensor(false)]; - tensor op_1246_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1246_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162624256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162881344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_1246_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1246_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162609920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162867008))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_13_cast_fp16 = transpose(perm = q_with_bias_v_13_perm_0, x = var_1244_cast_fp16)[name = tensor("transpose_269")]; tensor x_139_cast_fp16 = matmul(transpose_x = x_139_transpose_x_0, transpose_y = x_139_transpose_y_0, x = q_with_bias_v_13_cast_fp16, y = op_1246_to_fp16_quantized)[name = tensor("x_139_cast_fp16")]; tensor x_141_pad_0 = const()[name = tensor("x_141_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1006,12 +1006,12 @@ program(1.0) tensor var_1279 = const()[name = tensor("op_1279"), val = tensor([1, -1, 1024])]; tensor var_1278_cast_fp16 = transpose(perm = var_1278_perm_0, x = x_145_cast_fp16)[name = tensor("transpose_265")]; tensor input_347_cast_fp16 = reshape(shape = var_1279, x = var_1278_cast_fp16)[name = tensor("input_347_cast_fp16")]; - tensor model_layers_6_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162881920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163930560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_6_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162867584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163916224))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_61_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_6_self_attn_linear_out_weight_to_fp16_quantized, x = input_347_cast_fp16)[name = tensor("linear_61_cast_fp16")]; tensor input_351_cast_fp16 = add(x = input_343_cast_fp16, y = linear_61_cast_fp16)[name = tensor("input_351_cast_fp16")]; tensor x_149_axes_0 = const()[name = tensor("x_149_axes_0"), val = tensor([-1])]; - tensor model_layers_6_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163932672)))]; - tensor model_layers_6_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163934784)))]; + tensor model_layers_6_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163918336)))]; + tensor model_layers_6_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163920448)))]; tensor x_149_cast_fp16 = layer_norm(axes = x_149_axes_0, beta = model_layers_6_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_6_norm_conv_weight_to_fp16, x = input_351_cast_fp16)[name = tensor("x_149_cast_fp16")]; tensor input_353_perm_0 = const()[name = tensor("input_353_perm_0"), val = tensor([0, 2, 1])]; tensor input_355_pad_type_0 = const()[name = tensor("input_355_pad_type_0"), val = tensor("valid")]; @@ -1019,7 +1019,7 @@ program(1.0) tensor input_355_pad_0 = const()[name = tensor("input_355_pad_0"), val = tensor([0, 0])]; tensor input_355_dilations_0 = const()[name = tensor("input_355_dilations_0"), val = tensor([1])]; tensor input_355_groups_0 = const()[name = tensor("input_355_groups_0"), val = tensor(1)]; - tensor model_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163936896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166034112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163922560))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166019776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_353_cast_fp16 = transpose(perm = input_353_perm_0, x = x_149_cast_fp16)[name = tensor("transpose_264")]; tensor input_355_cast_fp16 = conv(dilations = input_355_dilations_0, groups = input_355_groups_0, pad = input_355_pad_0, pad_type = input_355_pad_type_0, strides = input_355_strides_0, weight = model_layers_6_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_353_cast_fp16)[name = tensor("input_355_cast_fp16")]; tensor x_151_split_num_splits_0 = const()[name = tensor("x_151_split_num_splits_0"), val = tensor(2)]; @@ -1037,8 +1037,8 @@ program(1.0) tensor input_361_strides_0 = const()[name = tensor("input_361_strides_0"), val = tensor([1])]; tensor input_361_pad_0 = const()[name = tensor("input_361_pad_0"), val = tensor([0, 0])]; tensor input_361_dilations_0 = const()[name = tensor("input_361_dilations_0"), val = tensor([1])]; - tensor const_260_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_260_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166038272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166047552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_261_to_fp16 = const()[name = tensor("const_261_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166049664)))]; + tensor const_260_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_260_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166023936))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166033216))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_261_to_fp16 = const()[name = tensor("const_261_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166035328)))]; tensor input_363_cast_fp16 = conv(bias = const_261_to_fp16, dilations = input_361_dilations_0, groups = input_361_groups_0, pad = input_361_pad_0, pad_type = input_361_pad_type_0, strides = input_361_strides_0, weight = const_260_to_fp16_quantized, x = input_359_cast_fp16)[name = tensor("input_363_cast_fp16")]; tensor input_365_cast_fp16 = silu(x = input_363_cast_fp16)[name = tensor("input_365_cast_fp16")]; tensor x_153_pad_type_0 = const()[name = tensor("x_153_pad_type_0"), val = tensor("valid")]; @@ -1046,64 +1046,64 @@ program(1.0) tensor x_153_pad_0 = const()[name = tensor("x_153_pad_0"), val = tensor([0, 0])]; tensor x_153_dilations_0 = const()[name = tensor("x_153_dilations_0"), val = tensor([1])]; tensor x_153_groups_0 = const()[name = tensor("x_153_groups_0"), val = tensor(1)]; - tensor model_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166051776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167100416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166037440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167086080))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_153_cast_fp16 = conv(dilations = x_153_dilations_0, groups = x_153_groups_0, pad = x_153_pad_0, pad_type = x_153_pad_type_0, strides = x_153_strides_0, weight = model_layers_6_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_365_cast_fp16)[name = tensor("x_153_cast_fp16")]; tensor input_367_perm_0 = const()[name = tensor("input_367_perm_0"), val = tensor([0, 2, 1])]; tensor input_367_cast_fp16 = transpose(perm = input_367_perm_0, x = x_153_cast_fp16)[name = tensor("transpose_263")]; tensor input_369_cast_fp16 = add(x = input_351_cast_fp16, y = input_367_cast_fp16)[name = tensor("input_369_cast_fp16")]; tensor input_371_axes_0 = const()[name = tensor("input_371_axes_0"), val = tensor([-1])]; - tensor model_layers_6_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167102528)))]; - tensor model_layers_6_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167104640)))]; + tensor model_layers_6_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167088192)))]; + tensor model_layers_6_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167090304)))]; tensor input_371_cast_fp16 = layer_norm(axes = input_371_axes_0, beta = model_layers_6_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_6_norm_feed_forward2_weight_to_fp16, x = input_369_cast_fp16)[name = tensor("input_371_cast_fp16")]; - tensor model_layers_6_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167106752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171301120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_6_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167092416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171286784))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_6_feed_forward2_linear1_weight_to_fp16_quantized, x = input_371_cast_fp16)[name = tensor("linear_62_cast_fp16")]; tensor input_375_cast_fp16 = silu(x = linear_62_cast_fp16)[name = tensor("input_375_cast_fp16")]; - tensor model_layers_6_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171309376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175503744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_6_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_6_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171295040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175489408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_63_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_6_feed_forward2_linear2_weight_to_fp16_quantized, x = input_375_cast_fp16)[name = tensor("linear_63_cast_fp16")]; tensor var_1339_to_fp16 = const()[name = tensor("op_1339_to_fp16"), val = tensor(0x1p-1)]; tensor var_1340_cast_fp16 = mul(x = linear_63_cast_fp16, y = var_1339_to_fp16)[name = tensor("op_1340_cast_fp16")]; tensor input_381_cast_fp16 = add(x = input_369_cast_fp16, y = var_1340_cast_fp16)[name = tensor("input_381_cast_fp16")]; tensor input_383_axes_0 = const()[name = tensor("input_383_axes_0"), val = tensor([-1])]; - tensor model_layers_6_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175505856)))]; - tensor model_layers_6_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175507968)))]; + tensor model_layers_6_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_6_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175491520)))]; + tensor model_layers_6_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_6_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175493632)))]; tensor input_383_cast_fp16 = layer_norm(axes = input_383_axes_0, beta = model_layers_6_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_6_norm_out_weight_to_fp16, x = input_381_cast_fp16)[name = tensor("input_383_cast_fp16")]; tensor input_385_axes_0 = const()[name = tensor("input_385_axes_0"), val = tensor([-1])]; - tensor model_layers_7_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175510080)))]; - tensor model_layers_7_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175512192)))]; + tensor model_layers_7_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175495744)))]; + tensor model_layers_7_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175497856)))]; tensor input_385_cast_fp16 = layer_norm(axes = input_385_axes_0, beta = model_layers_7_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_7_norm_feed_forward1_weight_to_fp16, x = input_383_cast_fp16)[name = tensor("input_385_cast_fp16")]; - tensor model_layers_7_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175514304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179708672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_7_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175499968))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179694336))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_7_feed_forward1_linear1_weight_to_fp16_quantized, x = input_385_cast_fp16)[name = tensor("linear_64_cast_fp16")]; tensor input_389_cast_fp16 = silu(x = linear_64_cast_fp16)[name = tensor("input_389_cast_fp16")]; - tensor model_layers_7_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179716928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183911296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_7_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179702592))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183896960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_65_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_7_feed_forward1_linear2_weight_to_fp16_quantized, x = input_389_cast_fp16)[name = tensor("linear_65_cast_fp16")]; tensor var_1368_to_fp16 = const()[name = tensor("op_1368_to_fp16"), val = tensor(0x1p-1)]; tensor var_1369_cast_fp16 = mul(x = linear_65_cast_fp16, y = var_1368_to_fp16)[name = tensor("op_1369_cast_fp16")]; tensor input_395_cast_fp16 = add(x = input_383_cast_fp16, y = var_1369_cast_fp16)[name = tensor("input_395_cast_fp16")]; tensor query_15_axes_0 = const()[name = tensor("query_15_axes_0"), val = tensor([-1])]; - tensor model_layers_7_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183913408)))]; - tensor model_layers_7_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183915520)))]; + tensor model_layers_7_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183899072)))]; + tensor model_layers_7_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183901184)))]; tensor query_15_cast_fp16 = layer_norm(axes = query_15_axes_0, beta = model_layers_7_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_7_norm_self_att_weight_to_fp16, x = input_395_cast_fp16)[name = tensor("query_15_cast_fp16")]; - tensor model_layers_7_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183917632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184966272))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_7_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183903296))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184951936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_66_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_7_self_attn_linear_q_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor("linear_66_cast_fp16")]; tensor var_1385 = const()[name = tensor("op_1385"), val = tensor([1, -1, 8, 128])]; tensor q_43_cast_fp16 = reshape(shape = var_1385, x = linear_66_cast_fp16)[name = tensor("q_43_cast_fp16")]; - tensor model_layers_7_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184968384))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186017024))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_7_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184954048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186002688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_67_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_7_self_attn_linear_k_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor("linear_67_cast_fp16")]; tensor var_1389 = const()[name = tensor("op_1389"), val = tensor([1, -1, 8, 128])]; tensor k_29_cast_fp16 = reshape(shape = var_1389, x = linear_67_cast_fp16)[name = tensor("k_29_cast_fp16")]; - tensor model_layers_7_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186019136))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187067776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_7_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186004800))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187053440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_68_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_7_self_attn_linear_v_weight_to_fp16_quantized, x = query_15_cast_fp16)[name = tensor("linear_68_cast_fp16")]; tensor var_1393 = const()[name = tensor("op_1393"), val = tensor([1, -1, 8, 128])]; tensor v_15_cast_fp16 = reshape(shape = var_1393, x = linear_68_cast_fp16)[name = tensor("v_15_cast_fp16")]; tensor value_15_perm_0 = const()[name = tensor("value_15_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_7_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_7_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187069888)))]; - tensor var_1405_cast_fp16 = add(x = q_43_cast_fp16, y = model_layers_7_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1405_cast_fp16")]; - tensor model_layers_7_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_7_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187072000)))]; - tensor var_1407_cast_fp16 = add(x = q_43_cast_fp16, y = model_layers_7_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1407_cast_fp16")]; + tensor model_layers_7_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187055552))), scale = tensor([0x1.b94p-8, 0x1.224p-7, 0x1.1b4p-8, 0x1.1dp-7, 0x1.8d4p-8, 0x1.bbp-8, 0x1.614p-8, 0x1.01cp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1405_cast_fp16 = add(x = q_43_cast_fp16, y = model_layers_7_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_1405_cast_fp16")]; + tensor model_layers_7_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187056640))), scale = tensor([0x1.5acp-9, 0x1.9dcp-11, 0x1.a34p-8, 0x1.d98p-9, 0x1.edp-9, 0x1.904p-8, 0x1.1ccp-8, 0x1.358p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1407_cast_fp16 = add(x = q_43_cast_fp16, y = model_layers_7_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_1407_cast_fp16")]; tensor q_with_bias_v_15_perm_0 = const()[name = tensor("q_with_bias_v_15_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_161_transpose_x_0 = const()[name = tensor("x_161_transpose_x_0"), val = tensor(false)]; tensor x_161_transpose_y_0 = const()[name = tensor("x_161_transpose_y_0"), val = tensor(false)]; - tensor op_1409_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1409_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187074112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187331200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_1409_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1409_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187057728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187314816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_15_cast_fp16 = transpose(perm = q_with_bias_v_15_perm_0, x = var_1407_cast_fp16)[name = tensor("transpose_262")]; tensor x_161_cast_fp16 = matmul(transpose_x = x_161_transpose_x_0, transpose_y = x_161_transpose_y_0, x = q_with_bias_v_15_cast_fp16, y = op_1409_to_fp16_quantized)[name = tensor("x_161_cast_fp16")]; tensor x_163_pad_0 = const()[name = tensor("x_163_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1143,12 +1143,12 @@ program(1.0) tensor var_1442 = const()[name = tensor("op_1442"), val = tensor([1, -1, 1024])]; tensor var_1441_cast_fp16 = transpose(perm = var_1441_perm_0, x = x_167_cast_fp16)[name = tensor("transpose_258")]; tensor input_399_cast_fp16 = reshape(shape = var_1442, x = var_1441_cast_fp16)[name = tensor("input_399_cast_fp16")]; - tensor model_layers_7_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187331776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188380416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_7_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187315392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188364032))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_70_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_7_self_attn_linear_out_weight_to_fp16_quantized, x = input_399_cast_fp16)[name = tensor("linear_70_cast_fp16")]; tensor input_403_cast_fp16 = add(x = input_395_cast_fp16, y = linear_70_cast_fp16)[name = tensor("input_403_cast_fp16")]; tensor x_171_axes_0 = const()[name = tensor("x_171_axes_0"), val = tensor([-1])]; - tensor model_layers_7_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188382528)))]; - tensor model_layers_7_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188384640)))]; + tensor model_layers_7_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188366144)))]; + tensor model_layers_7_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188368256)))]; tensor x_171_cast_fp16 = layer_norm(axes = x_171_axes_0, beta = model_layers_7_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_7_norm_conv_weight_to_fp16, x = input_403_cast_fp16)[name = tensor("x_171_cast_fp16")]; tensor input_405_perm_0 = const()[name = tensor("input_405_perm_0"), val = tensor([0, 2, 1])]; tensor input_407_pad_type_0 = const()[name = tensor("input_407_pad_type_0"), val = tensor("valid")]; @@ -1156,7 +1156,7 @@ program(1.0) tensor input_407_pad_0 = const()[name = tensor("input_407_pad_0"), val = tensor([0, 0])]; tensor input_407_dilations_0 = const()[name = tensor("input_407_dilations_0"), val = tensor([1])]; tensor input_407_groups_0 = const()[name = tensor("input_407_groups_0"), val = tensor(1)]; - tensor model_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188386752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190483968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(188370368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190467584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_405_cast_fp16 = transpose(perm = input_405_perm_0, x = x_171_cast_fp16)[name = tensor("transpose_257")]; tensor input_407_cast_fp16 = conv(dilations = input_407_dilations_0, groups = input_407_groups_0, pad = input_407_pad_0, pad_type = input_407_pad_type_0, strides = input_407_strides_0, weight = model_layers_7_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_405_cast_fp16)[name = tensor("input_407_cast_fp16")]; tensor x_173_split_num_splits_0 = const()[name = tensor("x_173_split_num_splits_0"), val = tensor(2)]; @@ -1174,8 +1174,8 @@ program(1.0) tensor input_413_strides_0 = const()[name = tensor("input_413_strides_0"), val = tensor([1])]; tensor input_413_pad_0 = const()[name = tensor("input_413_pad_0"), val = tensor([0, 0])]; tensor input_413_dilations_0 = const()[name = tensor("input_413_dilations_0"), val = tensor([1])]; - tensor const_262_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_262_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190488128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190497408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_263_to_fp16 = const()[name = tensor("const_263_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190499520)))]; + tensor const_262_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_262_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190471744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190481024))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_263_to_fp16 = const()[name = tensor("const_263_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190483136)))]; tensor input_415_cast_fp16 = conv(bias = const_263_to_fp16, dilations = input_413_dilations_0, groups = input_413_groups_0, pad = input_413_pad_0, pad_type = input_413_pad_type_0, strides = input_413_strides_0, weight = const_262_to_fp16_quantized, x = input_411_cast_fp16)[name = tensor("input_415_cast_fp16")]; tensor input_417_cast_fp16 = silu(x = input_415_cast_fp16)[name = tensor("input_417_cast_fp16")]; tensor x_175_pad_type_0 = const()[name = tensor("x_175_pad_type_0"), val = tensor("valid")]; @@ -1183,64 +1183,64 @@ program(1.0) tensor x_175_pad_0 = const()[name = tensor("x_175_pad_0"), val = tensor([0, 0])]; tensor x_175_dilations_0 = const()[name = tensor("x_175_dilations_0"), val = tensor([1])]; tensor x_175_groups_0 = const()[name = tensor("x_175_groups_0"), val = tensor(1)]; - tensor model_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190501632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191550272))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190485248))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191533888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_175_cast_fp16 = conv(dilations = x_175_dilations_0, groups = x_175_groups_0, pad = x_175_pad_0, pad_type = x_175_pad_type_0, strides = x_175_strides_0, weight = model_layers_7_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_417_cast_fp16)[name = tensor("x_175_cast_fp16")]; tensor input_419_perm_0 = const()[name = tensor("input_419_perm_0"), val = tensor([0, 2, 1])]; tensor input_419_cast_fp16 = transpose(perm = input_419_perm_0, x = x_175_cast_fp16)[name = tensor("transpose_256")]; tensor input_421_cast_fp16 = add(x = input_403_cast_fp16, y = input_419_cast_fp16)[name = tensor("input_421_cast_fp16")]; tensor input_423_axes_0 = const()[name = tensor("input_423_axes_0"), val = tensor([-1])]; - tensor model_layers_7_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191552384)))]; - tensor model_layers_7_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191554496)))]; + tensor model_layers_7_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191536000)))]; + tensor model_layers_7_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191538112)))]; tensor input_423_cast_fp16 = layer_norm(axes = input_423_axes_0, beta = model_layers_7_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_7_norm_feed_forward2_weight_to_fp16, x = input_421_cast_fp16)[name = tensor("input_423_cast_fp16")]; - tensor model_layers_7_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191556608))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195750976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_7_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191540224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195734592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_7_feed_forward2_linear1_weight_to_fp16_quantized, x = input_423_cast_fp16)[name = tensor("linear_71_cast_fp16")]; tensor input_427_cast_fp16 = silu(x = linear_71_cast_fp16)[name = tensor("input_427_cast_fp16")]; - tensor model_layers_7_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195759232))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199953600))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_7_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_7_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195742848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199937216))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_72_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_7_feed_forward2_linear2_weight_to_fp16_quantized, x = input_427_cast_fp16)[name = tensor("linear_72_cast_fp16")]; tensor var_1502_to_fp16 = const()[name = tensor("op_1502_to_fp16"), val = tensor(0x1p-1)]; tensor var_1503_cast_fp16 = mul(x = linear_72_cast_fp16, y = var_1502_to_fp16)[name = tensor("op_1503_cast_fp16")]; tensor input_433_cast_fp16 = add(x = input_421_cast_fp16, y = var_1503_cast_fp16)[name = tensor("input_433_cast_fp16")]; tensor input_435_axes_0 = const()[name = tensor("input_435_axes_0"), val = tensor([-1])]; - tensor model_layers_7_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199955712)))]; - tensor model_layers_7_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199957824)))]; + tensor model_layers_7_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_7_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199939328)))]; + tensor model_layers_7_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_7_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199941440)))]; tensor input_435_cast_fp16 = layer_norm(axes = input_435_axes_0, beta = model_layers_7_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_7_norm_out_weight_to_fp16, x = input_433_cast_fp16)[name = tensor("input_435_cast_fp16")]; tensor input_437_axes_0 = const()[name = tensor("input_437_axes_0"), val = tensor([-1])]; - tensor model_layers_8_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199959936)))]; - tensor model_layers_8_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199962048)))]; + tensor model_layers_8_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199943552)))]; + tensor model_layers_8_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199945664)))]; tensor input_437_cast_fp16 = layer_norm(axes = input_437_axes_0, beta = model_layers_8_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_8_norm_feed_forward1_weight_to_fp16, x = input_435_cast_fp16)[name = tensor("input_437_cast_fp16")]; - tensor model_layers_8_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199964160))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204158528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_8_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199947776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204142144))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_8_feed_forward1_linear1_weight_to_fp16_quantized, x = input_437_cast_fp16)[name = tensor("linear_73_cast_fp16")]; tensor input_441_cast_fp16 = silu(x = linear_73_cast_fp16)[name = tensor("input_441_cast_fp16")]; - tensor model_layers_8_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204166784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208361152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_8_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204150400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208344768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_74_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_8_feed_forward1_linear2_weight_to_fp16_quantized, x = input_441_cast_fp16)[name = tensor("linear_74_cast_fp16")]; tensor var_1531_to_fp16 = const()[name = tensor("op_1531_to_fp16"), val = tensor(0x1p-1)]; tensor var_1532_cast_fp16 = mul(x = linear_74_cast_fp16, y = var_1531_to_fp16)[name = tensor("op_1532_cast_fp16")]; tensor input_447_cast_fp16 = add(x = input_435_cast_fp16, y = var_1532_cast_fp16)[name = tensor("input_447_cast_fp16")]; tensor query_17_axes_0 = const()[name = tensor("query_17_axes_0"), val = tensor([-1])]; - tensor model_layers_8_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208363264)))]; - tensor model_layers_8_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208365376)))]; + tensor model_layers_8_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208346880)))]; + tensor model_layers_8_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208348992)))]; tensor query_17_cast_fp16 = layer_norm(axes = query_17_axes_0, beta = model_layers_8_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_8_norm_self_att_weight_to_fp16, x = input_447_cast_fp16)[name = tensor("query_17_cast_fp16")]; - tensor model_layers_8_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208367488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209416128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_8_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208351104))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209399744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_75_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_8_self_attn_linear_q_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor("linear_75_cast_fp16")]; tensor var_1548 = const()[name = tensor("op_1548"), val = tensor([1, -1, 8, 128])]; tensor q_49_cast_fp16 = reshape(shape = var_1548, x = linear_75_cast_fp16)[name = tensor("q_49_cast_fp16")]; - tensor model_layers_8_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209418240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210466880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_8_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209401856))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210450496))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_76_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_8_self_attn_linear_k_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor("linear_76_cast_fp16")]; tensor var_1552 = const()[name = tensor("op_1552"), val = tensor([1, -1, 8, 128])]; tensor k_33_cast_fp16 = reshape(shape = var_1552, x = linear_76_cast_fp16)[name = tensor("k_33_cast_fp16")]; - tensor model_layers_8_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210468992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211517632))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_8_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210452608))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211501248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_77_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_8_self_attn_linear_v_weight_to_fp16_quantized, x = query_17_cast_fp16)[name = tensor("linear_77_cast_fp16")]; tensor var_1556 = const()[name = tensor("op_1556"), val = tensor([1, -1, 8, 128])]; tensor v_17_cast_fp16 = reshape(shape = var_1556, x = linear_77_cast_fp16)[name = tensor("v_17_cast_fp16")]; tensor value_17_perm_0 = const()[name = tensor("value_17_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_8_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_8_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211519744)))]; - tensor var_1568_cast_fp16 = add(x = q_49_cast_fp16, y = model_layers_8_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1568_cast_fp16")]; - tensor model_layers_8_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_8_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211521856)))]; - tensor var_1570_cast_fp16 = add(x = q_49_cast_fp16, y = model_layers_8_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1570_cast_fp16")]; + tensor model_layers_8_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211503360))), scale = tensor([0x1.d1p-8, 0x1.848p-8, 0x1.8dcp-8, 0x1.bbcp-7, 0x1.4b4p-8, 0x1.0ccp-7, 0x1.234p-7, 0x1.34p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1568_cast_fp16 = add(x = q_49_cast_fp16, y = model_layers_8_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_1568_cast_fp16")]; + tensor model_layers_8_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211504448))), scale = tensor([0x1.608p-8, 0x1.538p-8, 0x1.6b8p-8, 0x1.f94p-9, 0x1.21cp-7, 0x1.c98p-9, 0x1.d08p-8, 0x1.638p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1570_cast_fp16 = add(x = q_49_cast_fp16, y = model_layers_8_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_1570_cast_fp16")]; tensor q_with_bias_v_17_perm_0 = const()[name = tensor("q_with_bias_v_17_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_183_transpose_x_0 = const()[name = tensor("x_183_transpose_x_0"), val = tensor(false)]; tensor x_183_transpose_y_0 = const()[name = tensor("x_183_transpose_y_0"), val = tensor(false)]; - tensor op_1572_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1572_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211523968))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211781056))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_1572_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1572_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211505536))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211762624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_17_cast_fp16 = transpose(perm = q_with_bias_v_17_perm_0, x = var_1570_cast_fp16)[name = tensor("transpose_255")]; tensor x_183_cast_fp16 = matmul(transpose_x = x_183_transpose_x_0, transpose_y = x_183_transpose_y_0, x = q_with_bias_v_17_cast_fp16, y = op_1572_to_fp16_quantized)[name = tensor("x_183_cast_fp16")]; tensor x_185_pad_0 = const()[name = tensor("x_185_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1280,12 +1280,12 @@ program(1.0) tensor var_1605 = const()[name = tensor("op_1605"), val = tensor([1, -1, 1024])]; tensor var_1604_cast_fp16 = transpose(perm = var_1604_perm_0, x = x_189_cast_fp16)[name = tensor("transpose_251")]; tensor input_451_cast_fp16 = reshape(shape = var_1605, x = var_1604_cast_fp16)[name = tensor("input_451_cast_fp16")]; - tensor model_layers_8_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211781632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212830272))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_8_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211763200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212811840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_79_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_8_self_attn_linear_out_weight_to_fp16_quantized, x = input_451_cast_fp16)[name = tensor("linear_79_cast_fp16")]; tensor input_455_cast_fp16 = add(x = input_447_cast_fp16, y = linear_79_cast_fp16)[name = tensor("input_455_cast_fp16")]; tensor x_193_axes_0 = const()[name = tensor("x_193_axes_0"), val = tensor([-1])]; - tensor model_layers_8_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212832384)))]; - tensor model_layers_8_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212834496)))]; + tensor model_layers_8_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212813952)))]; + tensor model_layers_8_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212816064)))]; tensor x_193_cast_fp16 = layer_norm(axes = x_193_axes_0, beta = model_layers_8_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_8_norm_conv_weight_to_fp16, x = input_455_cast_fp16)[name = tensor("x_193_cast_fp16")]; tensor input_457_perm_0 = const()[name = tensor("input_457_perm_0"), val = tensor([0, 2, 1])]; tensor input_459_pad_type_0 = const()[name = tensor("input_459_pad_type_0"), val = tensor("valid")]; @@ -1293,7 +1293,7 @@ program(1.0) tensor input_459_pad_0 = const()[name = tensor("input_459_pad_0"), val = tensor([0, 0])]; tensor input_459_dilations_0 = const()[name = tensor("input_459_dilations_0"), val = tensor([1])]; tensor input_459_groups_0 = const()[name = tensor("input_459_groups_0"), val = tensor(1)]; - tensor model_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212836608))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214933824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212818176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214915392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_457_cast_fp16 = transpose(perm = input_457_perm_0, x = x_193_cast_fp16)[name = tensor("transpose_250")]; tensor input_459_cast_fp16 = conv(dilations = input_459_dilations_0, groups = input_459_groups_0, pad = input_459_pad_0, pad_type = input_459_pad_type_0, strides = input_459_strides_0, weight = model_layers_8_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_457_cast_fp16)[name = tensor("input_459_cast_fp16")]; tensor x_195_split_num_splits_0 = const()[name = tensor("x_195_split_num_splits_0"), val = tensor(2)]; @@ -1311,8 +1311,8 @@ program(1.0) tensor input_465_strides_0 = const()[name = tensor("input_465_strides_0"), val = tensor([1])]; tensor input_465_pad_0 = const()[name = tensor("input_465_pad_0"), val = tensor([0, 0])]; tensor input_465_dilations_0 = const()[name = tensor("input_465_dilations_0"), val = tensor([1])]; - tensor const_264_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_264_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214937984))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214947264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_265_to_fp16 = const()[name = tensor("const_265_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214949376)))]; + tensor const_264_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_264_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214919552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214928832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_265_to_fp16 = const()[name = tensor("const_265_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214930944)))]; tensor input_467_cast_fp16 = conv(bias = const_265_to_fp16, dilations = input_465_dilations_0, groups = input_465_groups_0, pad = input_465_pad_0, pad_type = input_465_pad_type_0, strides = input_465_strides_0, weight = const_264_to_fp16_quantized, x = input_463_cast_fp16)[name = tensor("input_467_cast_fp16")]; tensor input_469_cast_fp16 = silu(x = input_467_cast_fp16)[name = tensor("input_469_cast_fp16")]; tensor x_197_pad_type_0 = const()[name = tensor("x_197_pad_type_0"), val = tensor("valid")]; @@ -1320,64 +1320,64 @@ program(1.0) tensor x_197_pad_0 = const()[name = tensor("x_197_pad_0"), val = tensor([0, 0])]; tensor x_197_dilations_0 = const()[name = tensor("x_197_dilations_0"), val = tensor([1])]; tensor x_197_groups_0 = const()[name = tensor("x_197_groups_0"), val = tensor(1)]; - tensor model_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214951488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216000128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214933056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215981696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_197_cast_fp16 = conv(dilations = x_197_dilations_0, groups = x_197_groups_0, pad = x_197_pad_0, pad_type = x_197_pad_type_0, strides = x_197_strides_0, weight = model_layers_8_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_469_cast_fp16)[name = tensor("x_197_cast_fp16")]; tensor input_471_perm_0 = const()[name = tensor("input_471_perm_0"), val = tensor([0, 2, 1])]; tensor input_471_cast_fp16 = transpose(perm = input_471_perm_0, x = x_197_cast_fp16)[name = tensor("transpose_249")]; tensor input_473_cast_fp16 = add(x = input_455_cast_fp16, y = input_471_cast_fp16)[name = tensor("input_473_cast_fp16")]; tensor input_475_axes_0 = const()[name = tensor("input_475_axes_0"), val = tensor([-1])]; - tensor model_layers_8_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216002240)))]; - tensor model_layers_8_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216004352)))]; + tensor model_layers_8_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215983808)))]; + tensor model_layers_8_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215985920)))]; tensor input_475_cast_fp16 = layer_norm(axes = input_475_axes_0, beta = model_layers_8_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_8_norm_feed_forward2_weight_to_fp16, x = input_473_cast_fp16)[name = tensor("input_475_cast_fp16")]; - tensor model_layers_8_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216006464))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220200832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_8_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(215988032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220182400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_8_feed_forward2_linear1_weight_to_fp16_quantized, x = input_475_cast_fp16)[name = tensor("linear_80_cast_fp16")]; tensor input_479_cast_fp16 = silu(x = linear_80_cast_fp16)[name = tensor("input_479_cast_fp16")]; - tensor model_layers_8_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220209088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224403456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_8_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_8_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220190656))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224385024))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_81_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_8_feed_forward2_linear2_weight_to_fp16_quantized, x = input_479_cast_fp16)[name = tensor("linear_81_cast_fp16")]; tensor var_1665_to_fp16 = const()[name = tensor("op_1665_to_fp16"), val = tensor(0x1p-1)]; tensor var_1666_cast_fp16 = mul(x = linear_81_cast_fp16, y = var_1665_to_fp16)[name = tensor("op_1666_cast_fp16")]; tensor input_485_cast_fp16 = add(x = input_473_cast_fp16, y = var_1666_cast_fp16)[name = tensor("input_485_cast_fp16")]; tensor input_487_axes_0 = const()[name = tensor("input_487_axes_0"), val = tensor([-1])]; - tensor model_layers_8_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224405568)))]; - tensor model_layers_8_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224407680)))]; + tensor model_layers_8_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_8_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224387136)))]; + tensor model_layers_8_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_8_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224389248)))]; tensor input_487_cast_fp16 = layer_norm(axes = input_487_axes_0, beta = model_layers_8_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_8_norm_out_weight_to_fp16, x = input_485_cast_fp16)[name = tensor("input_487_cast_fp16")]; tensor input_489_axes_0 = const()[name = tensor("input_489_axes_0"), val = tensor([-1])]; - tensor model_layers_9_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224409792)))]; - tensor model_layers_9_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224411904)))]; + tensor model_layers_9_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224391360)))]; + tensor model_layers_9_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224393472)))]; tensor input_489_cast_fp16 = layer_norm(axes = input_489_axes_0, beta = model_layers_9_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_9_norm_feed_forward1_weight_to_fp16, x = input_487_cast_fp16)[name = tensor("input_489_cast_fp16")]; - tensor model_layers_9_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224414016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228608384))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_9_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224395584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228589952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_82_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_9_feed_forward1_linear1_weight_to_fp16_quantized, x = input_489_cast_fp16)[name = tensor("linear_82_cast_fp16")]; tensor input_493_cast_fp16 = silu(x = linear_82_cast_fp16)[name = tensor("input_493_cast_fp16")]; - tensor model_layers_9_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228616640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232811008))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_9_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228598208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232792576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_83_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_9_feed_forward1_linear2_weight_to_fp16_quantized, x = input_493_cast_fp16)[name = tensor("linear_83_cast_fp16")]; tensor var_1694_to_fp16 = const()[name = tensor("op_1694_to_fp16"), val = tensor(0x1p-1)]; tensor var_1695_cast_fp16 = mul(x = linear_83_cast_fp16, y = var_1694_to_fp16)[name = tensor("op_1695_cast_fp16")]; tensor input_499_cast_fp16 = add(x = input_487_cast_fp16, y = var_1695_cast_fp16)[name = tensor("input_499_cast_fp16")]; tensor query_19_axes_0 = const()[name = tensor("query_19_axes_0"), val = tensor([-1])]; - tensor model_layers_9_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232813120)))]; - tensor model_layers_9_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232815232)))]; + tensor model_layers_9_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232794688)))]; + tensor model_layers_9_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232796800)))]; tensor query_19_cast_fp16 = layer_norm(axes = query_19_axes_0, beta = model_layers_9_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_9_norm_self_att_weight_to_fp16, x = input_499_cast_fp16)[name = tensor("query_19_cast_fp16")]; - tensor model_layers_9_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232817344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233865984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_9_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232798912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233847552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_84_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_9_self_attn_linear_q_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor("linear_84_cast_fp16")]; tensor var_1711 = const()[name = tensor("op_1711"), val = tensor([1, -1, 8, 128])]; tensor q_55_cast_fp16 = reshape(shape = var_1711, x = linear_84_cast_fp16)[name = tensor("q_55_cast_fp16")]; - tensor model_layers_9_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233868096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234916736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_9_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233849664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234898304))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_85_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_9_self_attn_linear_k_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor("linear_85_cast_fp16")]; tensor var_1715 = const()[name = tensor("op_1715"), val = tensor([1, -1, 8, 128])]; tensor k_37_cast_fp16 = reshape(shape = var_1715, x = linear_85_cast_fp16)[name = tensor("k_37_cast_fp16")]; - tensor model_layers_9_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234918848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235967488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_9_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234900416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235949056))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_86_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_9_self_attn_linear_v_weight_to_fp16_quantized, x = query_19_cast_fp16)[name = tensor("linear_86_cast_fp16")]; tensor var_1719 = const()[name = tensor("op_1719"), val = tensor([1, -1, 8, 128])]; tensor v_19_cast_fp16 = reshape(shape = var_1719, x = linear_86_cast_fp16)[name = tensor("v_19_cast_fp16")]; tensor value_19_perm_0 = const()[name = tensor("value_19_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_9_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_9_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235969600)))]; - tensor var_1731_cast_fp16 = add(x = q_55_cast_fp16, y = model_layers_9_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1731_cast_fp16")]; - tensor model_layers_9_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_9_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235971712)))]; - tensor var_1733_cast_fp16 = add(x = q_55_cast_fp16, y = model_layers_9_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1733_cast_fp16")]; + tensor model_layers_9_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235951168))), scale = tensor([0x1.15p-7, 0x1.4d8p-8, 0x1.eb8p-8, 0x1.3f4p-6, 0x1.bdcp-8, 0x1.e5cp-8, 0x1.968p-8, 0x1.034p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1731_cast_fp16 = add(x = q_55_cast_fp16, y = model_layers_9_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_1731_cast_fp16")]; + tensor model_layers_9_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235952256))), scale = tensor([0x1.46p-8, 0x1.5c4p-7, 0x1.fa4p-8, 0x1.0a4p-8, 0x1.a3p-9, 0x1.364p-7, 0x1.694p-7, 0x1.f8p-10]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1733_cast_fp16 = add(x = q_55_cast_fp16, y = model_layers_9_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_1733_cast_fp16")]; tensor q_with_bias_v_19_perm_0 = const()[name = tensor("q_with_bias_v_19_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_205_transpose_x_0 = const()[name = tensor("x_205_transpose_x_0"), val = tensor(false)]; tensor x_205_transpose_y_0 = const()[name = tensor("x_205_transpose_y_0"), val = tensor(false)]; - tensor op_1735_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1735_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235973824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236230912))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_1735_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1735_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235953344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236210432))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_19_cast_fp16 = transpose(perm = q_with_bias_v_19_perm_0, x = var_1733_cast_fp16)[name = tensor("transpose_248")]; tensor x_205_cast_fp16 = matmul(transpose_x = x_205_transpose_x_0, transpose_y = x_205_transpose_y_0, x = q_with_bias_v_19_cast_fp16, y = op_1735_to_fp16_quantized)[name = tensor("x_205_cast_fp16")]; tensor x_207_pad_0 = const()[name = tensor("x_207_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1417,12 +1417,12 @@ program(1.0) tensor var_1768 = const()[name = tensor("op_1768"), val = tensor([1, -1, 1024])]; tensor var_1767_cast_fp16 = transpose(perm = var_1767_perm_0, x = x_211_cast_fp16)[name = tensor("transpose_244")]; tensor input_503_cast_fp16 = reshape(shape = var_1768, x = var_1767_cast_fp16)[name = tensor("input_503_cast_fp16")]; - tensor model_layers_9_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236231488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237280128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_9_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236211008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237259648))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_88_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_9_self_attn_linear_out_weight_to_fp16_quantized, x = input_503_cast_fp16)[name = tensor("linear_88_cast_fp16")]; tensor input_507_cast_fp16 = add(x = input_499_cast_fp16, y = linear_88_cast_fp16)[name = tensor("input_507_cast_fp16")]; tensor x_215_axes_0 = const()[name = tensor("x_215_axes_0"), val = tensor([-1])]; - tensor model_layers_9_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237282240)))]; - tensor model_layers_9_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237284352)))]; + tensor model_layers_9_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237261760)))]; + tensor model_layers_9_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237263872)))]; tensor x_215_cast_fp16 = layer_norm(axes = x_215_axes_0, beta = model_layers_9_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_9_norm_conv_weight_to_fp16, x = input_507_cast_fp16)[name = tensor("x_215_cast_fp16")]; tensor input_509_perm_0 = const()[name = tensor("input_509_perm_0"), val = tensor([0, 2, 1])]; tensor input_511_pad_type_0 = const()[name = tensor("input_511_pad_type_0"), val = tensor("valid")]; @@ -1430,7 +1430,7 @@ program(1.0) tensor input_511_pad_0 = const()[name = tensor("input_511_pad_0"), val = tensor([0, 0])]; tensor input_511_dilations_0 = const()[name = tensor("input_511_dilations_0"), val = tensor([1])]; tensor input_511_groups_0 = const()[name = tensor("input_511_groups_0"), val = tensor(1)]; - tensor model_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237286464))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239383680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237265984))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239363200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_509_cast_fp16 = transpose(perm = input_509_perm_0, x = x_215_cast_fp16)[name = tensor("transpose_243")]; tensor input_511_cast_fp16 = conv(dilations = input_511_dilations_0, groups = input_511_groups_0, pad = input_511_pad_0, pad_type = input_511_pad_type_0, strides = input_511_strides_0, weight = model_layers_9_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_509_cast_fp16)[name = tensor("input_511_cast_fp16")]; tensor x_217_split_num_splits_0 = const()[name = tensor("x_217_split_num_splits_0"), val = tensor(2)]; @@ -1448,8 +1448,8 @@ program(1.0) tensor input_517_strides_0 = const()[name = tensor("input_517_strides_0"), val = tensor([1])]; tensor input_517_pad_0 = const()[name = tensor("input_517_pad_0"), val = tensor([0, 0])]; tensor input_517_dilations_0 = const()[name = tensor("input_517_dilations_0"), val = tensor([1])]; - tensor const_266_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_266_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239387840))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239397120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_267_to_fp16 = const()[name = tensor("const_267_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239399232)))]; + tensor const_266_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_266_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239367360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239376640))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_267_to_fp16 = const()[name = tensor("const_267_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239378752)))]; tensor input_519_cast_fp16 = conv(bias = const_267_to_fp16, dilations = input_517_dilations_0, groups = input_517_groups_0, pad = input_517_pad_0, pad_type = input_517_pad_type_0, strides = input_517_strides_0, weight = const_266_to_fp16_quantized, x = input_515_cast_fp16)[name = tensor("input_519_cast_fp16")]; tensor input_521_cast_fp16 = silu(x = input_519_cast_fp16)[name = tensor("input_521_cast_fp16")]; tensor x_219_pad_type_0 = const()[name = tensor("x_219_pad_type_0"), val = tensor("valid")]; @@ -1457,64 +1457,64 @@ program(1.0) tensor x_219_pad_0 = const()[name = tensor("x_219_pad_0"), val = tensor([0, 0])]; tensor x_219_dilations_0 = const()[name = tensor("x_219_dilations_0"), val = tensor([1])]; tensor x_219_groups_0 = const()[name = tensor("x_219_groups_0"), val = tensor(1)]; - tensor model_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239401344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240449984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(239380864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240429504))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_219_cast_fp16 = conv(dilations = x_219_dilations_0, groups = x_219_groups_0, pad = x_219_pad_0, pad_type = x_219_pad_type_0, strides = x_219_strides_0, weight = model_layers_9_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_521_cast_fp16)[name = tensor("x_219_cast_fp16")]; tensor input_523_perm_0 = const()[name = tensor("input_523_perm_0"), val = tensor([0, 2, 1])]; tensor input_523_cast_fp16 = transpose(perm = input_523_perm_0, x = x_219_cast_fp16)[name = tensor("transpose_242")]; tensor input_525_cast_fp16 = add(x = input_507_cast_fp16, y = input_523_cast_fp16)[name = tensor("input_525_cast_fp16")]; tensor input_527_axes_0 = const()[name = tensor("input_527_axes_0"), val = tensor([-1])]; - tensor model_layers_9_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240452096)))]; - tensor model_layers_9_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240454208)))]; + tensor model_layers_9_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240431616)))]; + tensor model_layers_9_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240433728)))]; tensor input_527_cast_fp16 = layer_norm(axes = input_527_axes_0, beta = model_layers_9_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_9_norm_feed_forward2_weight_to_fp16, x = input_525_cast_fp16)[name = tensor("input_527_cast_fp16")]; - tensor model_layers_9_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240456320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(244650688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_9_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(240435840))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(244630208))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_89_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_9_feed_forward2_linear1_weight_to_fp16_quantized, x = input_527_cast_fp16)[name = tensor("linear_89_cast_fp16")]; tensor input_531_cast_fp16 = silu(x = linear_89_cast_fp16)[name = tensor("input_531_cast_fp16")]; - tensor model_layers_9_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(244658944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248853312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_9_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_9_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(244638464))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248832832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_90_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_9_feed_forward2_linear2_weight_to_fp16_quantized, x = input_531_cast_fp16)[name = tensor("linear_90_cast_fp16")]; tensor var_1828_to_fp16 = const()[name = tensor("op_1828_to_fp16"), val = tensor(0x1p-1)]; tensor var_1829_cast_fp16 = mul(x = linear_90_cast_fp16, y = var_1828_to_fp16)[name = tensor("op_1829_cast_fp16")]; tensor input_537_cast_fp16 = add(x = input_525_cast_fp16, y = var_1829_cast_fp16)[name = tensor("input_537_cast_fp16")]; tensor input_539_axes_0 = const()[name = tensor("input_539_axes_0"), val = tensor([-1])]; - tensor model_layers_9_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248855424)))]; - tensor model_layers_9_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248857536)))]; + tensor model_layers_9_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_9_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248834944)))]; + tensor model_layers_9_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_9_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248837056)))]; tensor input_539_cast_fp16 = layer_norm(axes = input_539_axes_0, beta = model_layers_9_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_9_norm_out_weight_to_fp16, x = input_537_cast_fp16)[name = tensor("input_539_cast_fp16")]; tensor input_541_axes_0 = const()[name = tensor("input_541_axes_0"), val = tensor([-1])]; - tensor model_layers_10_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248859648)))]; - tensor model_layers_10_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248861760)))]; + tensor model_layers_10_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248839168)))]; + tensor model_layers_10_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248841280)))]; tensor input_541_cast_fp16 = layer_norm(axes = input_541_axes_0, beta = model_layers_10_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_10_norm_feed_forward1_weight_to_fp16, x = input_539_cast_fp16)[name = tensor("input_541_cast_fp16")]; - tensor model_layers_10_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248863872))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253058240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_10_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248843392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253037760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_91_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_10_feed_forward1_linear1_weight_to_fp16_quantized, x = input_541_cast_fp16)[name = tensor("linear_91_cast_fp16")]; tensor input_545_cast_fp16 = silu(x = linear_91_cast_fp16)[name = tensor("input_545_cast_fp16")]; - tensor model_layers_10_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253066496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257260864))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_10_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253046016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257240384))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_92_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_10_feed_forward1_linear2_weight_to_fp16_quantized, x = input_545_cast_fp16)[name = tensor("linear_92_cast_fp16")]; tensor var_1857_to_fp16 = const()[name = tensor("op_1857_to_fp16"), val = tensor(0x1p-1)]; tensor var_1858_cast_fp16 = mul(x = linear_92_cast_fp16, y = var_1857_to_fp16)[name = tensor("op_1858_cast_fp16")]; tensor input_551_cast_fp16 = add(x = input_539_cast_fp16, y = var_1858_cast_fp16)[name = tensor("input_551_cast_fp16")]; tensor query_21_axes_0 = const()[name = tensor("query_21_axes_0"), val = tensor([-1])]; - tensor model_layers_10_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257262976)))]; - tensor model_layers_10_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257265088)))]; + tensor model_layers_10_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257242496)))]; + tensor model_layers_10_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257244608)))]; tensor query_21_cast_fp16 = layer_norm(axes = query_21_axes_0, beta = model_layers_10_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_10_norm_self_att_weight_to_fp16, x = input_551_cast_fp16)[name = tensor("query_21_cast_fp16")]; - tensor model_layers_10_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257267200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258315840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_10_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257246720))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258295360))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_93_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_10_self_attn_linear_q_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor("linear_93_cast_fp16")]; tensor var_1874 = const()[name = tensor("op_1874"), val = tensor([1, -1, 8, 128])]; tensor q_61_cast_fp16 = reshape(shape = var_1874, x = linear_93_cast_fp16)[name = tensor("q_61_cast_fp16")]; - tensor model_layers_10_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258317952))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259366592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_10_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258297472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259346112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_94_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_10_self_attn_linear_k_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor("linear_94_cast_fp16")]; tensor var_1878 = const()[name = tensor("op_1878"), val = tensor([1, -1, 8, 128])]; tensor k_41_cast_fp16 = reshape(shape = var_1878, x = linear_94_cast_fp16)[name = tensor("k_41_cast_fp16")]; - tensor model_layers_10_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259368704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260417344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_10_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259348224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260396864))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_95_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_10_self_attn_linear_v_weight_to_fp16_quantized, x = query_21_cast_fp16)[name = tensor("linear_95_cast_fp16")]; tensor var_1882 = const()[name = tensor("op_1882"), val = tensor([1, -1, 8, 128])]; tensor v_21_cast_fp16 = reshape(shape = var_1882, x = linear_95_cast_fp16)[name = tensor("v_21_cast_fp16")]; tensor value_21_perm_0 = const()[name = tensor("value_21_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_10_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_10_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260419456)))]; - tensor var_1894_cast_fp16 = add(x = q_61_cast_fp16, y = model_layers_10_self_attn_pos_bias_u_to_fp16)[name = tensor("op_1894_cast_fp16")]; - tensor model_layers_10_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_10_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260421568)))]; - tensor var_1896_cast_fp16 = add(x = q_61_cast_fp16, y = model_layers_10_self_attn_pos_bias_v_to_fp16)[name = tensor("op_1896_cast_fp16")]; + tensor model_layers_10_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260398976))), scale = tensor([0x1.988p-8, 0x1.19p-7, 0x1.6b4p-7, 0x1.414p-8, 0x1.278p-8, 0x1.1d8p-7, 0x1.f08p-8, 0x1.488p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1894_cast_fp16 = add(x = q_61_cast_fp16, y = model_layers_10_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_1894_cast_fp16")]; + tensor model_layers_10_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260400064))), scale = tensor([0x1.46p-7, 0x1.bccp-8, 0x1.158p-9, 0x1.e9p-9, 0x1.23p-8, 0x1.f2cp-9, 0x1.4bcp-7, 0x1.034p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_1896_cast_fp16 = add(x = q_61_cast_fp16, y = model_layers_10_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_1896_cast_fp16")]; tensor q_with_bias_v_21_perm_0 = const()[name = tensor("q_with_bias_v_21_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_227_transpose_x_0 = const()[name = tensor("x_227_transpose_x_0"), val = tensor(false)]; tensor x_227_transpose_y_0 = const()[name = tensor("x_227_transpose_y_0"), val = tensor(false)]; - tensor op_1898_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1898_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260423680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260680768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_1898_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_1898_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260401152))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260658240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_21_cast_fp16 = transpose(perm = q_with_bias_v_21_perm_0, x = var_1896_cast_fp16)[name = tensor("transpose_241")]; tensor x_227_cast_fp16 = matmul(transpose_x = x_227_transpose_x_0, transpose_y = x_227_transpose_y_0, x = q_with_bias_v_21_cast_fp16, y = op_1898_to_fp16_quantized)[name = tensor("x_227_cast_fp16")]; tensor x_229_pad_0 = const()[name = tensor("x_229_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1554,12 +1554,12 @@ program(1.0) tensor var_1931 = const()[name = tensor("op_1931"), val = tensor([1, -1, 1024])]; tensor var_1930_cast_fp16 = transpose(perm = var_1930_perm_0, x = x_233_cast_fp16)[name = tensor("transpose_237")]; tensor input_555_cast_fp16 = reshape(shape = var_1931, x = var_1930_cast_fp16)[name = tensor("input_555_cast_fp16")]; - tensor model_layers_10_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260681344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261729984))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_10_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260658816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261707456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_97_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_10_self_attn_linear_out_weight_to_fp16_quantized, x = input_555_cast_fp16)[name = tensor("linear_97_cast_fp16")]; tensor input_559_cast_fp16 = add(x = input_551_cast_fp16, y = linear_97_cast_fp16)[name = tensor("input_559_cast_fp16")]; tensor x_237_axes_0 = const()[name = tensor("x_237_axes_0"), val = tensor([-1])]; - tensor model_layers_10_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261732096)))]; - tensor model_layers_10_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261734208)))]; + tensor model_layers_10_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261709568)))]; + tensor model_layers_10_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261711680)))]; tensor x_237_cast_fp16 = layer_norm(axes = x_237_axes_0, beta = model_layers_10_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_10_norm_conv_weight_to_fp16, x = input_559_cast_fp16)[name = tensor("x_237_cast_fp16")]; tensor input_561_perm_0 = const()[name = tensor("input_561_perm_0"), val = tensor([0, 2, 1])]; tensor input_563_pad_type_0 = const()[name = tensor("input_563_pad_type_0"), val = tensor("valid")]; @@ -1567,7 +1567,7 @@ program(1.0) tensor input_563_pad_0 = const()[name = tensor("input_563_pad_0"), val = tensor([0, 0])]; tensor input_563_dilations_0 = const()[name = tensor("input_563_dilations_0"), val = tensor([1])]; tensor input_563_groups_0 = const()[name = tensor("input_563_groups_0"), val = tensor(1)]; - tensor model_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261736320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263833536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261713792))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263811008))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_561_cast_fp16 = transpose(perm = input_561_perm_0, x = x_237_cast_fp16)[name = tensor("transpose_236")]; tensor input_563_cast_fp16 = conv(dilations = input_563_dilations_0, groups = input_563_groups_0, pad = input_563_pad_0, pad_type = input_563_pad_type_0, strides = input_563_strides_0, weight = model_layers_10_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_561_cast_fp16)[name = tensor("input_563_cast_fp16")]; tensor x_239_split_num_splits_0 = const()[name = tensor("x_239_split_num_splits_0"), val = tensor(2)]; @@ -1585,8 +1585,8 @@ program(1.0) tensor input_569_strides_0 = const()[name = tensor("input_569_strides_0"), val = tensor([1])]; tensor input_569_pad_0 = const()[name = tensor("input_569_pad_0"), val = tensor([0, 0])]; tensor input_569_dilations_0 = const()[name = tensor("input_569_dilations_0"), val = tensor([1])]; - tensor const_268_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_268_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263837696))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263846976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_269_to_fp16 = const()[name = tensor("const_269_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263849088)))]; + tensor const_268_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_268_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263815168))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263824448))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_269_to_fp16 = const()[name = tensor("const_269_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263826560)))]; tensor input_571_cast_fp16 = conv(bias = const_269_to_fp16, dilations = input_569_dilations_0, groups = input_569_groups_0, pad = input_569_pad_0, pad_type = input_569_pad_type_0, strides = input_569_strides_0, weight = const_268_to_fp16_quantized, x = input_567_cast_fp16)[name = tensor("input_571_cast_fp16")]; tensor input_573_cast_fp16 = silu(x = input_571_cast_fp16)[name = tensor("input_573_cast_fp16")]; tensor x_241_pad_type_0 = const()[name = tensor("x_241_pad_type_0"), val = tensor("valid")]; @@ -1594,64 +1594,64 @@ program(1.0) tensor x_241_pad_0 = const()[name = tensor("x_241_pad_0"), val = tensor([0, 0])]; tensor x_241_dilations_0 = const()[name = tensor("x_241_dilations_0"), val = tensor([1])]; tensor x_241_groups_0 = const()[name = tensor("x_241_groups_0"), val = tensor(1)]; - tensor model_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263851200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264899840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263828672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264877312))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_241_cast_fp16 = conv(dilations = x_241_dilations_0, groups = x_241_groups_0, pad = x_241_pad_0, pad_type = x_241_pad_type_0, strides = x_241_strides_0, weight = model_layers_10_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_573_cast_fp16)[name = tensor("x_241_cast_fp16")]; tensor input_575_perm_0 = const()[name = tensor("input_575_perm_0"), val = tensor([0, 2, 1])]; tensor input_575_cast_fp16 = transpose(perm = input_575_perm_0, x = x_241_cast_fp16)[name = tensor("transpose_235")]; tensor input_577_cast_fp16 = add(x = input_559_cast_fp16, y = input_575_cast_fp16)[name = tensor("input_577_cast_fp16")]; tensor input_579_axes_0 = const()[name = tensor("input_579_axes_0"), val = tensor([-1])]; - tensor model_layers_10_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264901952)))]; - tensor model_layers_10_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264904064)))]; + tensor model_layers_10_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264879424)))]; + tensor model_layers_10_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264881536)))]; tensor input_579_cast_fp16 = layer_norm(axes = input_579_axes_0, beta = model_layers_10_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_10_norm_feed_forward2_weight_to_fp16, x = input_577_cast_fp16)[name = tensor("input_579_cast_fp16")]; - tensor model_layers_10_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264906176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(269100544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_10_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(264883648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(269078016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_98_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_10_feed_forward2_linear1_weight_to_fp16_quantized, x = input_579_cast_fp16)[name = tensor("linear_98_cast_fp16")]; tensor input_583_cast_fp16 = silu(x = linear_98_cast_fp16)[name = tensor("input_583_cast_fp16")]; - tensor model_layers_10_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(269108800))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273303168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_10_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_10_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(269086272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273280640))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_99_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_10_feed_forward2_linear2_weight_to_fp16_quantized, x = input_583_cast_fp16)[name = tensor("linear_99_cast_fp16")]; tensor var_1991_to_fp16 = const()[name = tensor("op_1991_to_fp16"), val = tensor(0x1p-1)]; tensor var_1992_cast_fp16 = mul(x = linear_99_cast_fp16, y = var_1991_to_fp16)[name = tensor("op_1992_cast_fp16")]; tensor input_589_cast_fp16 = add(x = input_577_cast_fp16, y = var_1992_cast_fp16)[name = tensor("input_589_cast_fp16")]; tensor input_591_axes_0 = const()[name = tensor("input_591_axes_0"), val = tensor([-1])]; - tensor model_layers_10_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273305280)))]; - tensor model_layers_10_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273307392)))]; + tensor model_layers_10_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_10_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273282752)))]; + tensor model_layers_10_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_10_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273284864)))]; tensor input_591_cast_fp16 = layer_norm(axes = input_591_axes_0, beta = model_layers_10_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_10_norm_out_weight_to_fp16, x = input_589_cast_fp16)[name = tensor("input_591_cast_fp16")]; tensor input_593_axes_0 = const()[name = tensor("input_593_axes_0"), val = tensor([-1])]; - tensor model_layers_11_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273309504)))]; - tensor model_layers_11_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273311616)))]; + tensor model_layers_11_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273286976)))]; + tensor model_layers_11_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273289088)))]; tensor input_593_cast_fp16 = layer_norm(axes = input_593_axes_0, beta = model_layers_11_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_11_norm_feed_forward1_weight_to_fp16, x = input_591_cast_fp16)[name = tensor("input_593_cast_fp16")]; - tensor model_layers_11_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273313728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277508096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_11_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273291200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277485568))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_11_feed_forward1_linear1_weight_to_fp16_quantized, x = input_593_cast_fp16)[name = tensor("linear_100_cast_fp16")]; tensor input_597_cast_fp16 = silu(x = linear_100_cast_fp16)[name = tensor("input_597_cast_fp16")]; - tensor model_layers_11_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277516352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281710720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_11_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277493824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281688192))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_101_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_11_feed_forward1_linear2_weight_to_fp16_quantized, x = input_597_cast_fp16)[name = tensor("linear_101_cast_fp16")]; tensor var_2020_to_fp16 = const()[name = tensor("op_2020_to_fp16"), val = tensor(0x1p-1)]; tensor var_2021_cast_fp16 = mul(x = linear_101_cast_fp16, y = var_2020_to_fp16)[name = tensor("op_2021_cast_fp16")]; tensor input_603_cast_fp16 = add(x = input_591_cast_fp16, y = var_2021_cast_fp16)[name = tensor("input_603_cast_fp16")]; tensor query_23_axes_0 = const()[name = tensor("query_23_axes_0"), val = tensor([-1])]; - tensor model_layers_11_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281712832)))]; - tensor model_layers_11_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281714944)))]; + tensor model_layers_11_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281690304)))]; + tensor model_layers_11_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281692416)))]; tensor query_23_cast_fp16 = layer_norm(axes = query_23_axes_0, beta = model_layers_11_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_11_norm_self_att_weight_to_fp16, x = input_603_cast_fp16)[name = tensor("query_23_cast_fp16")]; - tensor model_layers_11_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281717056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(282765696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_11_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281694528))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(282743168))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_102_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_11_self_attn_linear_q_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor("linear_102_cast_fp16")]; tensor var_2037 = const()[name = tensor("op_2037"), val = tensor([1, -1, 8, 128])]; tensor q_67_cast_fp16 = reshape(shape = var_2037, x = linear_102_cast_fp16)[name = tensor("q_67_cast_fp16")]; - tensor model_layers_11_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(282767808))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283816448))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_11_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(282745280))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283793920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_103_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_11_self_attn_linear_k_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor("linear_103_cast_fp16")]; tensor var_2041 = const()[name = tensor("op_2041"), val = tensor([1, -1, 8, 128])]; tensor k_45_cast_fp16 = reshape(shape = var_2041, x = linear_103_cast_fp16)[name = tensor("k_45_cast_fp16")]; - tensor model_layers_11_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283818560))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284867200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_11_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283796032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284844672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_104_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_11_self_attn_linear_v_weight_to_fp16_quantized, x = query_23_cast_fp16)[name = tensor("linear_104_cast_fp16")]; tensor var_2045 = const()[name = tensor("op_2045"), val = tensor([1, -1, 8, 128])]; tensor v_23_cast_fp16 = reshape(shape = var_2045, x = linear_104_cast_fp16)[name = tensor("v_23_cast_fp16")]; tensor value_23_perm_0 = const()[name = tensor("value_23_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_11_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_11_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284869312)))]; - tensor var_2057_cast_fp16 = add(x = q_67_cast_fp16, y = model_layers_11_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2057_cast_fp16")]; - tensor model_layers_11_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_11_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284871424)))]; - tensor var_2059_cast_fp16 = add(x = q_67_cast_fp16, y = model_layers_11_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2059_cast_fp16")]; + tensor model_layers_11_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284846784))), scale = tensor([0x1.d9cp-8, 0x1.a74p-8, 0x1.1e8p-7, 0x1.e1cp-8, 0x1.eb8p-8, 0x1.e38p-8, 0x1.f54p-8, 0x1.f64p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2057_cast_fp16 = add(x = q_67_cast_fp16, y = model_layers_11_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_2057_cast_fp16")]; + tensor model_layers_11_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284847872))), scale = tensor([0x1.b58p-8, 0x1.32p-7, 0x1.338p-9, 0x1.dccp-8, 0x1.3a8p-9, 0x1.664p-8, 0x1.2a4p-7, 0x1.27p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2059_cast_fp16 = add(x = q_67_cast_fp16, y = model_layers_11_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_2059_cast_fp16")]; tensor q_with_bias_v_23_perm_0 = const()[name = tensor("q_with_bias_v_23_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_249_transpose_x_0 = const()[name = tensor("x_249_transpose_x_0"), val = tensor(false)]; tensor x_249_transpose_y_0 = const()[name = tensor("x_249_transpose_y_0"), val = tensor(false)]; - tensor op_2061_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2061_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284873536))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285130624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_2061_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2061_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284848960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285106048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_23_cast_fp16 = transpose(perm = q_with_bias_v_23_perm_0, x = var_2059_cast_fp16)[name = tensor("transpose_234")]; tensor x_249_cast_fp16 = matmul(transpose_x = x_249_transpose_x_0, transpose_y = x_249_transpose_y_0, x = q_with_bias_v_23_cast_fp16, y = op_2061_to_fp16_quantized)[name = tensor("x_249_cast_fp16")]; tensor x_251_pad_0 = const()[name = tensor("x_251_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1691,12 +1691,12 @@ program(1.0) tensor var_2094 = const()[name = tensor("op_2094"), val = tensor([1, -1, 1024])]; tensor var_2093_cast_fp16 = transpose(perm = var_2093_perm_0, x = x_255_cast_fp16)[name = tensor("transpose_230")]; tensor input_607_cast_fp16 = reshape(shape = var_2094, x = var_2093_cast_fp16)[name = tensor("input_607_cast_fp16")]; - tensor model_layers_11_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285131200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286179840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_11_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285106624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286155264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_106_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_11_self_attn_linear_out_weight_to_fp16_quantized, x = input_607_cast_fp16)[name = tensor("linear_106_cast_fp16")]; tensor input_611_cast_fp16 = add(x = input_603_cast_fp16, y = linear_106_cast_fp16)[name = tensor("input_611_cast_fp16")]; tensor x_259_axes_0 = const()[name = tensor("x_259_axes_0"), val = tensor([-1])]; - tensor model_layers_11_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286181952)))]; - tensor model_layers_11_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286184064)))]; + tensor model_layers_11_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286157376)))]; + tensor model_layers_11_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286159488)))]; tensor x_259_cast_fp16 = layer_norm(axes = x_259_axes_0, beta = model_layers_11_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_11_norm_conv_weight_to_fp16, x = input_611_cast_fp16)[name = tensor("x_259_cast_fp16")]; tensor input_613_perm_0 = const()[name = tensor("input_613_perm_0"), val = tensor([0, 2, 1])]; tensor input_615_pad_type_0 = const()[name = tensor("input_615_pad_type_0"), val = tensor("valid")]; @@ -1704,7 +1704,7 @@ program(1.0) tensor input_615_pad_0 = const()[name = tensor("input_615_pad_0"), val = tensor([0, 0])]; tensor input_615_dilations_0 = const()[name = tensor("input_615_dilations_0"), val = tensor([1])]; tensor input_615_groups_0 = const()[name = tensor("input_615_groups_0"), val = tensor(1)]; - tensor model_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286186176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288283392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286161600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288258816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_613_cast_fp16 = transpose(perm = input_613_perm_0, x = x_259_cast_fp16)[name = tensor("transpose_229")]; tensor input_615_cast_fp16 = conv(dilations = input_615_dilations_0, groups = input_615_groups_0, pad = input_615_pad_0, pad_type = input_615_pad_type_0, strides = input_615_strides_0, weight = model_layers_11_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_613_cast_fp16)[name = tensor("input_615_cast_fp16")]; tensor x_261_split_num_splits_0 = const()[name = tensor("x_261_split_num_splits_0"), val = tensor(2)]; @@ -1722,8 +1722,8 @@ program(1.0) tensor input_621_strides_0 = const()[name = tensor("input_621_strides_0"), val = tensor([1])]; tensor input_621_pad_0 = const()[name = tensor("input_621_pad_0"), val = tensor([0, 0])]; tensor input_621_dilations_0 = const()[name = tensor("input_621_dilations_0"), val = tensor([1])]; - tensor const_270_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_270_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288287552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288296832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_271_to_fp16 = const()[name = tensor("const_271_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288298944)))]; + tensor const_270_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_270_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288262976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288272256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_271_to_fp16 = const()[name = tensor("const_271_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288274368)))]; tensor input_623_cast_fp16 = conv(bias = const_271_to_fp16, dilations = input_621_dilations_0, groups = input_621_groups_0, pad = input_621_pad_0, pad_type = input_621_pad_type_0, strides = input_621_strides_0, weight = const_270_to_fp16_quantized, x = input_619_cast_fp16)[name = tensor("input_623_cast_fp16")]; tensor input_625_cast_fp16 = silu(x = input_623_cast_fp16)[name = tensor("input_625_cast_fp16")]; tensor x_263_pad_type_0 = const()[name = tensor("x_263_pad_type_0"), val = tensor("valid")]; @@ -1731,64 +1731,64 @@ program(1.0) tensor x_263_pad_0 = const()[name = tensor("x_263_pad_0"), val = tensor([0, 0])]; tensor x_263_dilations_0 = const()[name = tensor("x_263_dilations_0"), val = tensor([1])]; tensor x_263_groups_0 = const()[name = tensor("x_263_groups_0"), val = tensor(1)]; - tensor model_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288301056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289349696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288276480))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289325120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_263_cast_fp16 = conv(dilations = x_263_dilations_0, groups = x_263_groups_0, pad = x_263_pad_0, pad_type = x_263_pad_type_0, strides = x_263_strides_0, weight = model_layers_11_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_625_cast_fp16)[name = tensor("x_263_cast_fp16")]; tensor input_627_perm_0 = const()[name = tensor("input_627_perm_0"), val = tensor([0, 2, 1])]; tensor input_627_cast_fp16 = transpose(perm = input_627_perm_0, x = x_263_cast_fp16)[name = tensor("transpose_228")]; tensor input_629_cast_fp16 = add(x = input_611_cast_fp16, y = input_627_cast_fp16)[name = tensor("input_629_cast_fp16")]; tensor input_631_axes_0 = const()[name = tensor("input_631_axes_0"), val = tensor([-1])]; - tensor model_layers_11_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289351808)))]; - tensor model_layers_11_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289353920)))]; + tensor model_layers_11_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289327232)))]; + tensor model_layers_11_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289329344)))]; tensor input_631_cast_fp16 = layer_norm(axes = input_631_axes_0, beta = model_layers_11_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_11_norm_feed_forward2_weight_to_fp16, x = input_629_cast_fp16)[name = tensor("input_631_cast_fp16")]; - tensor model_layers_11_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289356032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(293550400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_11_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289331456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(293525824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_11_feed_forward2_linear1_weight_to_fp16_quantized, x = input_631_cast_fp16)[name = tensor("linear_107_cast_fp16")]; tensor input_635_cast_fp16 = silu(x = linear_107_cast_fp16)[name = tensor("input_635_cast_fp16")]; - tensor model_layers_11_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(293558656))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297753024))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_11_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_11_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(293534080))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297728448))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_108_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_11_feed_forward2_linear2_weight_to_fp16_quantized, x = input_635_cast_fp16)[name = tensor("linear_108_cast_fp16")]; tensor var_2154_to_fp16 = const()[name = tensor("op_2154_to_fp16"), val = tensor(0x1p-1)]; tensor var_2155_cast_fp16 = mul(x = linear_108_cast_fp16, y = var_2154_to_fp16)[name = tensor("op_2155_cast_fp16")]; tensor input_641_cast_fp16 = add(x = input_629_cast_fp16, y = var_2155_cast_fp16)[name = tensor("input_641_cast_fp16")]; tensor input_643_axes_0 = const()[name = tensor("input_643_axes_0"), val = tensor([-1])]; - tensor model_layers_11_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297755136)))]; - tensor model_layers_11_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297757248)))]; + tensor model_layers_11_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_11_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297730560)))]; + tensor model_layers_11_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_11_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297732672)))]; tensor input_643_cast_fp16 = layer_norm(axes = input_643_axes_0, beta = model_layers_11_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_11_norm_out_weight_to_fp16, x = input_641_cast_fp16)[name = tensor("input_643_cast_fp16")]; tensor input_645_axes_0 = const()[name = tensor("input_645_axes_0"), val = tensor([-1])]; - tensor model_layers_12_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297759360)))]; - tensor model_layers_12_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297761472)))]; + tensor model_layers_12_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297734784)))]; + tensor model_layers_12_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297736896)))]; tensor input_645_cast_fp16 = layer_norm(axes = input_645_axes_0, beta = model_layers_12_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_12_norm_feed_forward1_weight_to_fp16, x = input_643_cast_fp16)[name = tensor("input_645_cast_fp16")]; - tensor model_layers_12_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297763584))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301957952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_12_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297739008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301933376))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_109_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_12_feed_forward1_linear1_weight_to_fp16_quantized, x = input_645_cast_fp16)[name = tensor("linear_109_cast_fp16")]; tensor input_649_cast_fp16 = silu(x = linear_109_cast_fp16)[name = tensor("input_649_cast_fp16")]; - tensor model_layers_12_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301966208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306160576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_12_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301941632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306136000))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_110_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_12_feed_forward1_linear2_weight_to_fp16_quantized, x = input_649_cast_fp16)[name = tensor("linear_110_cast_fp16")]; tensor var_2183_to_fp16 = const()[name = tensor("op_2183_to_fp16"), val = tensor(0x1p-1)]; tensor var_2184_cast_fp16 = mul(x = linear_110_cast_fp16, y = var_2183_to_fp16)[name = tensor("op_2184_cast_fp16")]; tensor input_655_cast_fp16 = add(x = input_643_cast_fp16, y = var_2184_cast_fp16)[name = tensor("input_655_cast_fp16")]; tensor query_25_axes_0 = const()[name = tensor("query_25_axes_0"), val = tensor([-1])]; - tensor model_layers_12_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306162688)))]; - tensor model_layers_12_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306164800)))]; + tensor model_layers_12_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306138112)))]; + tensor model_layers_12_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306140224)))]; tensor query_25_cast_fp16 = layer_norm(axes = query_25_axes_0, beta = model_layers_12_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_12_norm_self_att_weight_to_fp16, x = input_655_cast_fp16)[name = tensor("query_25_cast_fp16")]; - tensor model_layers_12_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306166912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307215552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_12_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306142336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307190976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_111_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_12_self_attn_linear_q_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor("linear_111_cast_fp16")]; tensor var_2200 = const()[name = tensor("op_2200"), val = tensor([1, -1, 8, 128])]; tensor q_73_cast_fp16 = reshape(shape = var_2200, x = linear_111_cast_fp16)[name = tensor("q_73_cast_fp16")]; - tensor model_layers_12_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307217664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308266304))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_12_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307193088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308241728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_112_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_12_self_attn_linear_k_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor("linear_112_cast_fp16")]; tensor var_2204 = const()[name = tensor("op_2204"), val = tensor([1, -1, 8, 128])]; tensor k_49_cast_fp16 = reshape(shape = var_2204, x = linear_112_cast_fp16)[name = tensor("k_49_cast_fp16")]; - tensor model_layers_12_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308268416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309317056))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_12_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308243840))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309292480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_113_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_12_self_attn_linear_v_weight_to_fp16_quantized, x = query_25_cast_fp16)[name = tensor("linear_113_cast_fp16")]; tensor var_2208 = const()[name = tensor("op_2208"), val = tensor([1, -1, 8, 128])]; tensor v_25_cast_fp16 = reshape(shape = var_2208, x = linear_113_cast_fp16)[name = tensor("v_25_cast_fp16")]; tensor value_25_perm_0 = const()[name = tensor("value_25_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_12_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_12_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309319168)))]; - tensor var_2220_cast_fp16 = add(x = q_73_cast_fp16, y = model_layers_12_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2220_cast_fp16")]; - tensor model_layers_12_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_12_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309321280)))]; - tensor var_2222_cast_fp16 = add(x = q_73_cast_fp16, y = model_layers_12_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2222_cast_fp16")]; + tensor model_layers_12_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309294592))), scale = tensor([0x1.998p-8, 0x1.cdcp-8, 0x1.c98p-8, 0x1.16p-7, 0x1.084p-7, 0x1.17p-7, 0x1.2c8p-7, 0x1.2dcp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2220_cast_fp16 = add(x = q_73_cast_fp16, y = model_layers_12_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_2220_cast_fp16")]; + tensor model_layers_12_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309295680))), scale = tensor([0x1.64p-9, 0x1.5b8p-7, 0x1.65p-8, 0x1.82p-8, 0x1.d54p-8, 0x1.138p-7, 0x1.40cp-7, 0x1.284p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2222_cast_fp16 = add(x = q_73_cast_fp16, y = model_layers_12_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_2222_cast_fp16")]; tensor q_with_bias_v_25_perm_0 = const()[name = tensor("q_with_bias_v_25_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_271_transpose_x_0 = const()[name = tensor("x_271_transpose_x_0"), val = tensor(false)]; tensor x_271_transpose_y_0 = const()[name = tensor("x_271_transpose_y_0"), val = tensor(false)]; - tensor op_2224_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2224_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309323392))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309580480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_2224_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2224_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309296768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309553856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_25_cast_fp16 = transpose(perm = q_with_bias_v_25_perm_0, x = var_2222_cast_fp16)[name = tensor("transpose_227")]; tensor x_271_cast_fp16 = matmul(transpose_x = x_271_transpose_x_0, transpose_y = x_271_transpose_y_0, x = q_with_bias_v_25_cast_fp16, y = op_2224_to_fp16_quantized)[name = tensor("x_271_cast_fp16")]; tensor x_273_pad_0 = const()[name = tensor("x_273_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1828,12 +1828,12 @@ program(1.0) tensor var_2257 = const()[name = tensor("op_2257"), val = tensor([1, -1, 1024])]; tensor var_2256_cast_fp16 = transpose(perm = var_2256_perm_0, x = x_277_cast_fp16)[name = tensor("transpose_223")]; tensor input_659_cast_fp16 = reshape(shape = var_2257, x = var_2256_cast_fp16)[name = tensor("input_659_cast_fp16")]; - tensor model_layers_12_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309581056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310629696))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_12_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309554432))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310603072))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_115_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_12_self_attn_linear_out_weight_to_fp16_quantized, x = input_659_cast_fp16)[name = tensor("linear_115_cast_fp16")]; tensor input_663_cast_fp16 = add(x = input_655_cast_fp16, y = linear_115_cast_fp16)[name = tensor("input_663_cast_fp16")]; tensor x_281_axes_0 = const()[name = tensor("x_281_axes_0"), val = tensor([-1])]; - tensor model_layers_12_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310631808)))]; - tensor model_layers_12_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310633920)))]; + tensor model_layers_12_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310605184)))]; + tensor model_layers_12_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310607296)))]; tensor x_281_cast_fp16 = layer_norm(axes = x_281_axes_0, beta = model_layers_12_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_12_norm_conv_weight_to_fp16, x = input_663_cast_fp16)[name = tensor("x_281_cast_fp16")]; tensor input_665_perm_0 = const()[name = tensor("input_665_perm_0"), val = tensor([0, 2, 1])]; tensor input_667_pad_type_0 = const()[name = tensor("input_667_pad_type_0"), val = tensor("valid")]; @@ -1841,7 +1841,7 @@ program(1.0) tensor input_667_pad_0 = const()[name = tensor("input_667_pad_0"), val = tensor([0, 0])]; tensor input_667_dilations_0 = const()[name = tensor("input_667_dilations_0"), val = tensor([1])]; tensor input_667_groups_0 = const()[name = tensor("input_667_groups_0"), val = tensor(1)]; - tensor model_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310636032))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312733248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310609408))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312706624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_665_cast_fp16 = transpose(perm = input_665_perm_0, x = x_281_cast_fp16)[name = tensor("transpose_222")]; tensor input_667_cast_fp16 = conv(dilations = input_667_dilations_0, groups = input_667_groups_0, pad = input_667_pad_0, pad_type = input_667_pad_type_0, strides = input_667_strides_0, weight = model_layers_12_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_665_cast_fp16)[name = tensor("input_667_cast_fp16")]; tensor x_283_split_num_splits_0 = const()[name = tensor("x_283_split_num_splits_0"), val = tensor(2)]; @@ -1859,8 +1859,8 @@ program(1.0) tensor input_673_strides_0 = const()[name = tensor("input_673_strides_0"), val = tensor([1])]; tensor input_673_pad_0 = const()[name = tensor("input_673_pad_0"), val = tensor([0, 0])]; tensor input_673_dilations_0 = const()[name = tensor("input_673_dilations_0"), val = tensor([1])]; - tensor const_272_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_272_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312737408))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312746688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_273_to_fp16 = const()[name = tensor("const_273_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312748800)))]; + tensor const_272_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_272_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312710784))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312720064))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_273_to_fp16 = const()[name = tensor("const_273_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312722176)))]; tensor input_675_cast_fp16 = conv(bias = const_273_to_fp16, dilations = input_673_dilations_0, groups = input_673_groups_0, pad = input_673_pad_0, pad_type = input_673_pad_type_0, strides = input_673_strides_0, weight = const_272_to_fp16_quantized, x = input_671_cast_fp16)[name = tensor("input_675_cast_fp16")]; tensor input_677_cast_fp16 = silu(x = input_675_cast_fp16)[name = tensor("input_677_cast_fp16")]; tensor x_285_pad_type_0 = const()[name = tensor("x_285_pad_type_0"), val = tensor("valid")]; @@ -1868,64 +1868,64 @@ program(1.0) tensor x_285_pad_0 = const()[name = tensor("x_285_pad_0"), val = tensor([0, 0])]; tensor x_285_dilations_0 = const()[name = tensor("x_285_dilations_0"), val = tensor([1])]; tensor x_285_groups_0 = const()[name = tensor("x_285_groups_0"), val = tensor(1)]; - tensor model_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312750912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313799552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312724288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313772928))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_285_cast_fp16 = conv(dilations = x_285_dilations_0, groups = x_285_groups_0, pad = x_285_pad_0, pad_type = x_285_pad_type_0, strides = x_285_strides_0, weight = model_layers_12_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_677_cast_fp16)[name = tensor("x_285_cast_fp16")]; tensor input_679_perm_0 = const()[name = tensor("input_679_perm_0"), val = tensor([0, 2, 1])]; tensor input_679_cast_fp16 = transpose(perm = input_679_perm_0, x = x_285_cast_fp16)[name = tensor("transpose_221")]; tensor input_681_cast_fp16 = add(x = input_663_cast_fp16, y = input_679_cast_fp16)[name = tensor("input_681_cast_fp16")]; tensor input_683_axes_0 = const()[name = tensor("input_683_axes_0"), val = tensor([-1])]; - tensor model_layers_12_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313801664)))]; - tensor model_layers_12_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313803776)))]; + tensor model_layers_12_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313775040)))]; + tensor model_layers_12_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313777152)))]; tensor input_683_cast_fp16 = layer_norm(axes = input_683_axes_0, beta = model_layers_12_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_12_norm_feed_forward2_weight_to_fp16, x = input_681_cast_fp16)[name = tensor("input_683_cast_fp16")]; - tensor model_layers_12_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313805888))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(318000256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_12_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313779264))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(317973632))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_116_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_12_feed_forward2_linear1_weight_to_fp16_quantized, x = input_683_cast_fp16)[name = tensor("linear_116_cast_fp16")]; tensor input_687_cast_fp16 = silu(x = linear_116_cast_fp16)[name = tensor("input_687_cast_fp16")]; - tensor model_layers_12_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(318008512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322202880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_12_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_12_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(317981888))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322176256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_117_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_12_feed_forward2_linear2_weight_to_fp16_quantized, x = input_687_cast_fp16)[name = tensor("linear_117_cast_fp16")]; tensor var_2317_to_fp16 = const()[name = tensor("op_2317_to_fp16"), val = tensor(0x1p-1)]; tensor var_2318_cast_fp16 = mul(x = linear_117_cast_fp16, y = var_2317_to_fp16)[name = tensor("op_2318_cast_fp16")]; tensor input_693_cast_fp16 = add(x = input_681_cast_fp16, y = var_2318_cast_fp16)[name = tensor("input_693_cast_fp16")]; tensor input_695_axes_0 = const()[name = tensor("input_695_axes_0"), val = tensor([-1])]; - tensor model_layers_12_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322204992)))]; - tensor model_layers_12_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322207104)))]; + tensor model_layers_12_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_12_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322178368)))]; + tensor model_layers_12_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_12_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322180480)))]; tensor input_695_cast_fp16 = layer_norm(axes = input_695_axes_0, beta = model_layers_12_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_12_norm_out_weight_to_fp16, x = input_693_cast_fp16)[name = tensor("input_695_cast_fp16")]; tensor input_697_axes_0 = const()[name = tensor("input_697_axes_0"), val = tensor([-1])]; - tensor model_layers_13_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322209216)))]; - tensor model_layers_13_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322211328)))]; + tensor model_layers_13_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322182592)))]; + tensor model_layers_13_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322184704)))]; tensor input_697_cast_fp16 = layer_norm(axes = input_697_axes_0, beta = model_layers_13_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_13_norm_feed_forward1_weight_to_fp16, x = input_695_cast_fp16)[name = tensor("input_697_cast_fp16")]; - tensor model_layers_13_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322213440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326407808))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_13_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322186816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326381184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_13_feed_forward1_linear1_weight_to_fp16_quantized, x = input_697_cast_fp16)[name = tensor("linear_118_cast_fp16")]; tensor input_701_cast_fp16 = silu(x = linear_118_cast_fp16)[name = tensor("input_701_cast_fp16")]; - tensor model_layers_13_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326416064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330610432))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_13_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326389440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330583808))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_119_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_13_feed_forward1_linear2_weight_to_fp16_quantized, x = input_701_cast_fp16)[name = tensor("linear_119_cast_fp16")]; tensor var_2346_to_fp16 = const()[name = tensor("op_2346_to_fp16"), val = tensor(0x1p-1)]; tensor var_2347_cast_fp16 = mul(x = linear_119_cast_fp16, y = var_2346_to_fp16)[name = tensor("op_2347_cast_fp16")]; tensor input_707_cast_fp16 = add(x = input_695_cast_fp16, y = var_2347_cast_fp16)[name = tensor("input_707_cast_fp16")]; tensor query_27_axes_0 = const()[name = tensor("query_27_axes_0"), val = tensor([-1])]; - tensor model_layers_13_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330612544)))]; - tensor model_layers_13_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330614656)))]; + tensor model_layers_13_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330585920)))]; + tensor model_layers_13_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330588032)))]; tensor query_27_cast_fp16 = layer_norm(axes = query_27_axes_0, beta = model_layers_13_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_13_norm_self_att_weight_to_fp16, x = input_707_cast_fp16)[name = tensor("query_27_cast_fp16")]; - tensor model_layers_13_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330616768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(331665408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_13_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330590144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(331638784))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_120_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_13_self_attn_linear_q_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor("linear_120_cast_fp16")]; tensor var_2363 = const()[name = tensor("op_2363"), val = tensor([1, -1, 8, 128])]; tensor q_79_cast_fp16 = reshape(shape = var_2363, x = linear_120_cast_fp16)[name = tensor("q_79_cast_fp16")]; - tensor model_layers_13_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(331667520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332716160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_13_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(331640896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332689536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_121_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_13_self_attn_linear_k_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor("linear_121_cast_fp16")]; tensor var_2367 = const()[name = tensor("op_2367"), val = tensor([1, -1, 8, 128])]; tensor k_53_cast_fp16 = reshape(shape = var_2367, x = linear_121_cast_fp16)[name = tensor("k_53_cast_fp16")]; - tensor model_layers_13_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332718272))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333766912))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_13_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332691648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333740288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_122_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_13_self_attn_linear_v_weight_to_fp16_quantized, x = query_27_cast_fp16)[name = tensor("linear_122_cast_fp16")]; tensor var_2371 = const()[name = tensor("op_2371"), val = tensor([1, -1, 8, 128])]; tensor v_27_cast_fp16 = reshape(shape = var_2371, x = linear_122_cast_fp16)[name = tensor("v_27_cast_fp16")]; tensor value_27_perm_0 = const()[name = tensor("value_27_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_13_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_13_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333769024)))]; - tensor var_2383_cast_fp16 = add(x = q_79_cast_fp16, y = model_layers_13_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2383_cast_fp16")]; - tensor model_layers_13_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_13_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333771136)))]; - tensor var_2385_cast_fp16 = add(x = q_79_cast_fp16, y = model_layers_13_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2385_cast_fp16")]; + tensor model_layers_13_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333742400))), scale = tensor([0x1.9fcp-7, 0x1.388p-7, 0x1.7a8p-8, 0x1.208p-6, 0x1.95p-7, 0x1.474p-7, 0x1.11cp-7, 0x1.514p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2383_cast_fp16 = add(x = q_79_cast_fp16, y = model_layers_13_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_2383_cast_fp16")]; + tensor model_layers_13_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333743488))), scale = tensor([0x1.244p-7, 0x1.c98p-9, 0x1.fa8p-8, 0x1.accp-8, 0x1.d04p-8, 0x1.37cp-7, 0x1.f0cp-9, 0x1.07cp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2385_cast_fp16 = add(x = q_79_cast_fp16, y = model_layers_13_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_2385_cast_fp16")]; tensor q_with_bias_v_27_perm_0 = const()[name = tensor("q_with_bias_v_27_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_293_transpose_x_0 = const()[name = tensor("x_293_transpose_x_0"), val = tensor(false)]; tensor x_293_transpose_y_0 = const()[name = tensor("x_293_transpose_y_0"), val = tensor(false)]; - tensor op_2387_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2387_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333773248))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334030336))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_2387_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2387_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333744576))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334001664))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_27_cast_fp16 = transpose(perm = q_with_bias_v_27_perm_0, x = var_2385_cast_fp16)[name = tensor("transpose_220")]; tensor x_293_cast_fp16 = matmul(transpose_x = x_293_transpose_x_0, transpose_y = x_293_transpose_y_0, x = q_with_bias_v_27_cast_fp16, y = op_2387_to_fp16_quantized)[name = tensor("x_293_cast_fp16")]; tensor x_295_pad_0 = const()[name = tensor("x_295_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -1965,12 +1965,12 @@ program(1.0) tensor var_2420 = const()[name = tensor("op_2420"), val = tensor([1, -1, 1024])]; tensor var_2419_cast_fp16 = transpose(perm = var_2419_perm_0, x = x_299_cast_fp16)[name = tensor("transpose_216")]; tensor input_711_cast_fp16 = reshape(shape = var_2420, x = var_2419_cast_fp16)[name = tensor("input_711_cast_fp16")]; - tensor model_layers_13_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334030912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335079552))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_13_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334002240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335050880))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_124_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_13_self_attn_linear_out_weight_to_fp16_quantized, x = input_711_cast_fp16)[name = tensor("linear_124_cast_fp16")]; tensor input_715_cast_fp16 = add(x = input_707_cast_fp16, y = linear_124_cast_fp16)[name = tensor("input_715_cast_fp16")]; tensor x_303_axes_0 = const()[name = tensor("x_303_axes_0"), val = tensor([-1])]; - tensor model_layers_13_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335081664)))]; - tensor model_layers_13_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335083776)))]; + tensor model_layers_13_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335052992)))]; + tensor model_layers_13_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335055104)))]; tensor x_303_cast_fp16 = layer_norm(axes = x_303_axes_0, beta = model_layers_13_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_13_norm_conv_weight_to_fp16, x = input_715_cast_fp16)[name = tensor("x_303_cast_fp16")]; tensor input_717_perm_0 = const()[name = tensor("input_717_perm_0"), val = tensor([0, 2, 1])]; tensor input_719_pad_type_0 = const()[name = tensor("input_719_pad_type_0"), val = tensor("valid")]; @@ -1978,7 +1978,7 @@ program(1.0) tensor input_719_pad_0 = const()[name = tensor("input_719_pad_0"), val = tensor([0, 0])]; tensor input_719_dilations_0 = const()[name = tensor("input_719_dilations_0"), val = tensor([1])]; tensor input_719_groups_0 = const()[name = tensor("input_719_groups_0"), val = tensor(1)]; - tensor model_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335085888))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337183104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335057216))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337154432))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_717_cast_fp16 = transpose(perm = input_717_perm_0, x = x_303_cast_fp16)[name = tensor("transpose_215")]; tensor input_719_cast_fp16 = conv(dilations = input_719_dilations_0, groups = input_719_groups_0, pad = input_719_pad_0, pad_type = input_719_pad_type_0, strides = input_719_strides_0, weight = model_layers_13_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_717_cast_fp16)[name = tensor("input_719_cast_fp16")]; tensor x_305_split_num_splits_0 = const()[name = tensor("x_305_split_num_splits_0"), val = tensor(2)]; @@ -1996,8 +1996,8 @@ program(1.0) tensor input_725_strides_0 = const()[name = tensor("input_725_strides_0"), val = tensor([1])]; tensor input_725_pad_0 = const()[name = tensor("input_725_pad_0"), val = tensor([0, 0])]; tensor input_725_dilations_0 = const()[name = tensor("input_725_dilations_0"), val = tensor([1])]; - tensor const_274_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_274_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337187264))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337196544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_275_to_fp16 = const()[name = tensor("const_275_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337198656)))]; + tensor const_274_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_274_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337158592))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337167872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_275_to_fp16 = const()[name = tensor("const_275_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337169984)))]; tensor input_727_cast_fp16 = conv(bias = const_275_to_fp16, dilations = input_725_dilations_0, groups = input_725_groups_0, pad = input_725_pad_0, pad_type = input_725_pad_type_0, strides = input_725_strides_0, weight = const_274_to_fp16_quantized, x = input_723_cast_fp16)[name = tensor("input_727_cast_fp16")]; tensor input_729_cast_fp16 = silu(x = input_727_cast_fp16)[name = tensor("input_729_cast_fp16")]; tensor x_307_pad_type_0 = const()[name = tensor("x_307_pad_type_0"), val = tensor("valid")]; @@ -2005,64 +2005,64 @@ program(1.0) tensor x_307_pad_0 = const()[name = tensor("x_307_pad_0"), val = tensor([0, 0])]; tensor x_307_dilations_0 = const()[name = tensor("x_307_dilations_0"), val = tensor([1])]; tensor x_307_groups_0 = const()[name = tensor("x_307_groups_0"), val = tensor(1)]; - tensor model_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337200768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338249408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337172096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338220736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_307_cast_fp16 = conv(dilations = x_307_dilations_0, groups = x_307_groups_0, pad = x_307_pad_0, pad_type = x_307_pad_type_0, strides = x_307_strides_0, weight = model_layers_13_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_729_cast_fp16)[name = tensor("x_307_cast_fp16")]; tensor input_731_perm_0 = const()[name = tensor("input_731_perm_0"), val = tensor([0, 2, 1])]; tensor input_731_cast_fp16 = transpose(perm = input_731_perm_0, x = x_307_cast_fp16)[name = tensor("transpose_214")]; tensor input_733_cast_fp16 = add(x = input_715_cast_fp16, y = input_731_cast_fp16)[name = tensor("input_733_cast_fp16")]; tensor input_735_axes_0 = const()[name = tensor("input_735_axes_0"), val = tensor([-1])]; - tensor model_layers_13_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338251520)))]; - tensor model_layers_13_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338253632)))]; + tensor model_layers_13_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338222848)))]; + tensor model_layers_13_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338224960)))]; tensor input_735_cast_fp16 = layer_norm(axes = input_735_axes_0, beta = model_layers_13_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_13_norm_feed_forward2_weight_to_fp16, x = input_733_cast_fp16)[name = tensor("input_735_cast_fp16")]; - tensor model_layers_13_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338255744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342450112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_13_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338227072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342421440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_13_feed_forward2_linear1_weight_to_fp16_quantized, x = input_735_cast_fp16)[name = tensor("linear_125_cast_fp16")]; tensor input_739_cast_fp16 = silu(x = linear_125_cast_fp16)[name = tensor("input_739_cast_fp16")]; - tensor model_layers_13_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342458368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346652736))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_13_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_13_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342429696))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346624064))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_126_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_13_feed_forward2_linear2_weight_to_fp16_quantized, x = input_739_cast_fp16)[name = tensor("linear_126_cast_fp16")]; tensor var_2480_to_fp16 = const()[name = tensor("op_2480_to_fp16"), val = tensor(0x1p-1)]; tensor var_2481_cast_fp16 = mul(x = linear_126_cast_fp16, y = var_2480_to_fp16)[name = tensor("op_2481_cast_fp16")]; tensor input_745_cast_fp16 = add(x = input_733_cast_fp16, y = var_2481_cast_fp16)[name = tensor("input_745_cast_fp16")]; tensor input_747_axes_0 = const()[name = tensor("input_747_axes_0"), val = tensor([-1])]; - tensor model_layers_13_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346654848)))]; - tensor model_layers_13_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346656960)))]; + tensor model_layers_13_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_13_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346626176)))]; + tensor model_layers_13_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_13_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346628288)))]; tensor input_747_cast_fp16 = layer_norm(axes = input_747_axes_0, beta = model_layers_13_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_13_norm_out_weight_to_fp16, x = input_745_cast_fp16)[name = tensor("input_747_cast_fp16")]; tensor input_749_axes_0 = const()[name = tensor("input_749_axes_0"), val = tensor([-1])]; - tensor model_layers_14_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346659072)))]; - tensor model_layers_14_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346661184)))]; + tensor model_layers_14_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346630400)))]; + tensor model_layers_14_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346632512)))]; tensor input_749_cast_fp16 = layer_norm(axes = input_749_axes_0, beta = model_layers_14_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_14_norm_feed_forward1_weight_to_fp16, x = input_747_cast_fp16)[name = tensor("input_749_cast_fp16")]; - tensor model_layers_14_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346663296))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350857664))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_14_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346634624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350828992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_14_feed_forward1_linear1_weight_to_fp16_quantized, x = input_749_cast_fp16)[name = tensor("linear_127_cast_fp16")]; tensor input_753_cast_fp16 = silu(x = linear_127_cast_fp16)[name = tensor("input_753_cast_fp16")]; - tensor model_layers_14_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350865920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355060288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_14_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350837248))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355031616))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_128_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_14_feed_forward1_linear2_weight_to_fp16_quantized, x = input_753_cast_fp16)[name = tensor("linear_128_cast_fp16")]; tensor var_2509_to_fp16 = const()[name = tensor("op_2509_to_fp16"), val = tensor(0x1p-1)]; tensor var_2510_cast_fp16 = mul(x = linear_128_cast_fp16, y = var_2509_to_fp16)[name = tensor("op_2510_cast_fp16")]; tensor input_759_cast_fp16 = add(x = input_747_cast_fp16, y = var_2510_cast_fp16)[name = tensor("input_759_cast_fp16")]; tensor query_29_axes_0 = const()[name = tensor("query_29_axes_0"), val = tensor([-1])]; - tensor model_layers_14_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355062400)))]; - tensor model_layers_14_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355064512)))]; + tensor model_layers_14_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355033728)))]; + tensor model_layers_14_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355035840)))]; tensor query_29_cast_fp16 = layer_norm(axes = query_29_axes_0, beta = model_layers_14_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_14_norm_self_att_weight_to_fp16, x = input_759_cast_fp16)[name = tensor("query_29_cast_fp16")]; - tensor model_layers_14_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355066624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356115264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_14_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355037952))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356086592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_129_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_14_self_attn_linear_q_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor("linear_129_cast_fp16")]; tensor var_2526 = const()[name = tensor("op_2526"), val = tensor([1, -1, 8, 128])]; tensor q_85_cast_fp16 = reshape(shape = var_2526, x = linear_129_cast_fp16)[name = tensor("q_85_cast_fp16")]; - tensor model_layers_14_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356117376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357166016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_14_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(356088704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357137344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_130_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_14_self_attn_linear_k_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor("linear_130_cast_fp16")]; tensor var_2530 = const()[name = tensor("op_2530"), val = tensor([1, -1, 8, 128])]; tensor k_57_cast_fp16 = reshape(shape = var_2530, x = linear_130_cast_fp16)[name = tensor("k_57_cast_fp16")]; - tensor model_layers_14_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357168128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358216768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_14_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357139456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358188096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_131_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_14_self_attn_linear_v_weight_to_fp16_quantized, x = query_29_cast_fp16)[name = tensor("linear_131_cast_fp16")]; tensor var_2534 = const()[name = tensor("op_2534"), val = tensor([1, -1, 8, 128])]; tensor v_29_cast_fp16 = reshape(shape = var_2534, x = linear_131_cast_fp16)[name = tensor("v_29_cast_fp16")]; tensor value_29_perm_0 = const()[name = tensor("value_29_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_14_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_14_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358218880)))]; - tensor var_2546_cast_fp16 = add(x = q_85_cast_fp16, y = model_layers_14_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2546_cast_fp16")]; - tensor model_layers_14_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_14_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358220992)))]; - tensor var_2548_cast_fp16 = add(x = q_85_cast_fp16, y = model_layers_14_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2548_cast_fp16")]; + tensor model_layers_14_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358190208))), scale = tensor([0x1.37p-7, 0x1.7c8p-7, 0x1.024p-6, 0x1.61p-7, 0x1.d9cp-8, 0x1.8c4p-7, 0x1.158p-7, 0x1.9d8p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2546_cast_fp16 = add(x = q_85_cast_fp16, y = model_layers_14_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_2546_cast_fp16")]; + tensor model_layers_14_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358191296))), scale = tensor([0x1.958p-8, 0x1.624p-9, 0x1.388p-8, 0x1.554p-7, 0x1.24p-7, 0x1.93p-8, 0x1.d6cp-8, 0x1.998p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2548_cast_fp16 = add(x = q_85_cast_fp16, y = model_layers_14_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_2548_cast_fp16")]; tensor q_with_bias_v_29_perm_0 = const()[name = tensor("q_with_bias_v_29_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_315_transpose_x_0 = const()[name = tensor("x_315_transpose_x_0"), val = tensor(false)]; tensor x_315_transpose_y_0 = const()[name = tensor("x_315_transpose_y_0"), val = tensor(false)]; - tensor op_2550_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2550_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358223104))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358480192))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_2550_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2550_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358192384))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358449472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_29_cast_fp16 = transpose(perm = q_with_bias_v_29_perm_0, x = var_2548_cast_fp16)[name = tensor("transpose_213")]; tensor x_315_cast_fp16 = matmul(transpose_x = x_315_transpose_x_0, transpose_y = x_315_transpose_y_0, x = q_with_bias_v_29_cast_fp16, y = op_2550_to_fp16_quantized)[name = tensor("x_315_cast_fp16")]; tensor x_317_pad_0 = const()[name = tensor("x_317_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -2102,12 +2102,12 @@ program(1.0) tensor var_2583 = const()[name = tensor("op_2583"), val = tensor([1, -1, 1024])]; tensor var_2582_cast_fp16 = transpose(perm = var_2582_perm_0, x = x_321_cast_fp16)[name = tensor("transpose_209")]; tensor input_763_cast_fp16 = reshape(shape = var_2583, x = var_2582_cast_fp16)[name = tensor("input_763_cast_fp16")]; - tensor model_layers_14_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358480768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359529408))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_14_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358450048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359498688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_133_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_14_self_attn_linear_out_weight_to_fp16_quantized, x = input_763_cast_fp16)[name = tensor("linear_133_cast_fp16")]; tensor input_767_cast_fp16 = add(x = input_759_cast_fp16, y = linear_133_cast_fp16)[name = tensor("input_767_cast_fp16")]; tensor x_325_axes_0 = const()[name = tensor("x_325_axes_0"), val = tensor([-1])]; - tensor model_layers_14_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359531520)))]; - tensor model_layers_14_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359533632)))]; + tensor model_layers_14_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359500800)))]; + tensor model_layers_14_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359502912)))]; tensor x_325_cast_fp16 = layer_norm(axes = x_325_axes_0, beta = model_layers_14_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_14_norm_conv_weight_to_fp16, x = input_767_cast_fp16)[name = tensor("x_325_cast_fp16")]; tensor input_769_perm_0 = const()[name = tensor("input_769_perm_0"), val = tensor([0, 2, 1])]; tensor input_771_pad_type_0 = const()[name = tensor("input_771_pad_type_0"), val = tensor("valid")]; @@ -2115,7 +2115,7 @@ program(1.0) tensor input_771_pad_0 = const()[name = tensor("input_771_pad_0"), val = tensor([0, 0])]; tensor input_771_dilations_0 = const()[name = tensor("input_771_dilations_0"), val = tensor([1])]; tensor input_771_groups_0 = const()[name = tensor("input_771_groups_0"), val = tensor(1)]; - tensor model_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359535744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361632960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359505024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361602240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_769_cast_fp16 = transpose(perm = input_769_perm_0, x = x_325_cast_fp16)[name = tensor("transpose_208")]; tensor input_771_cast_fp16 = conv(dilations = input_771_dilations_0, groups = input_771_groups_0, pad = input_771_pad_0, pad_type = input_771_pad_type_0, strides = input_771_strides_0, weight = model_layers_14_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_769_cast_fp16)[name = tensor("input_771_cast_fp16")]; tensor x_327_split_num_splits_0 = const()[name = tensor("x_327_split_num_splits_0"), val = tensor(2)]; @@ -2133,8 +2133,8 @@ program(1.0) tensor input_777_strides_0 = const()[name = tensor("input_777_strides_0"), val = tensor([1])]; tensor input_777_pad_0 = const()[name = tensor("input_777_pad_0"), val = tensor([0, 0])]; tensor input_777_dilations_0 = const()[name = tensor("input_777_dilations_0"), val = tensor([1])]; - tensor const_276_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_276_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361637120))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361646400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_277_to_fp16 = const()[name = tensor("const_277_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361648512)))]; + tensor const_276_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_276_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361606400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361615680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_277_to_fp16 = const()[name = tensor("const_277_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361617792)))]; tensor input_779_cast_fp16 = conv(bias = const_277_to_fp16, dilations = input_777_dilations_0, groups = input_777_groups_0, pad = input_777_pad_0, pad_type = input_777_pad_type_0, strides = input_777_strides_0, weight = const_276_to_fp16_quantized, x = input_775_cast_fp16)[name = tensor("input_779_cast_fp16")]; tensor input_781_cast_fp16 = silu(x = input_779_cast_fp16)[name = tensor("input_781_cast_fp16")]; tensor x_329_pad_type_0 = const()[name = tensor("x_329_pad_type_0"), val = tensor("valid")]; @@ -2142,64 +2142,64 @@ program(1.0) tensor x_329_pad_0 = const()[name = tensor("x_329_pad_0"), val = tensor([0, 0])]; tensor x_329_dilations_0 = const()[name = tensor("x_329_dilations_0"), val = tensor([1])]; tensor x_329_groups_0 = const()[name = tensor("x_329_groups_0"), val = tensor(1)]; - tensor model_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361650624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362699264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361619904))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362668544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_329_cast_fp16 = conv(dilations = x_329_dilations_0, groups = x_329_groups_0, pad = x_329_pad_0, pad_type = x_329_pad_type_0, strides = x_329_strides_0, weight = model_layers_14_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_781_cast_fp16)[name = tensor("x_329_cast_fp16")]; tensor input_783_perm_0 = const()[name = tensor("input_783_perm_0"), val = tensor([0, 2, 1])]; tensor input_783_cast_fp16 = transpose(perm = input_783_perm_0, x = x_329_cast_fp16)[name = tensor("transpose_207")]; tensor input_785_cast_fp16 = add(x = input_767_cast_fp16, y = input_783_cast_fp16)[name = tensor("input_785_cast_fp16")]; tensor input_787_axes_0 = const()[name = tensor("input_787_axes_0"), val = tensor([-1])]; - tensor model_layers_14_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362701376)))]; - tensor model_layers_14_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362703488)))]; + tensor model_layers_14_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362670656)))]; + tensor model_layers_14_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362672768)))]; tensor input_787_cast_fp16 = layer_norm(axes = input_787_axes_0, beta = model_layers_14_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_14_norm_feed_forward2_weight_to_fp16, x = input_785_cast_fp16)[name = tensor("input_787_cast_fp16")]; - tensor model_layers_14_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362705600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366899968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_14_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362674880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366869248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_14_feed_forward2_linear1_weight_to_fp16_quantized, x = input_787_cast_fp16)[name = tensor("linear_134_cast_fp16")]; tensor input_791_cast_fp16 = silu(x = linear_134_cast_fp16)[name = tensor("input_791_cast_fp16")]; - tensor model_layers_14_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366908224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371102592))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_14_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_14_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366877504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371071872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_135_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_14_feed_forward2_linear2_weight_to_fp16_quantized, x = input_791_cast_fp16)[name = tensor("linear_135_cast_fp16")]; tensor var_2643_to_fp16 = const()[name = tensor("op_2643_to_fp16"), val = tensor(0x1p-1)]; tensor var_2644_cast_fp16 = mul(x = linear_135_cast_fp16, y = var_2643_to_fp16)[name = tensor("op_2644_cast_fp16")]; tensor input_797_cast_fp16 = add(x = input_785_cast_fp16, y = var_2644_cast_fp16)[name = tensor("input_797_cast_fp16")]; tensor input_799_axes_0 = const()[name = tensor("input_799_axes_0"), val = tensor([-1])]; - tensor model_layers_14_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371104704)))]; - tensor model_layers_14_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371106816)))]; + tensor model_layers_14_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_14_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371073984)))]; + tensor model_layers_14_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_14_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371076096)))]; tensor input_799_cast_fp16 = layer_norm(axes = input_799_axes_0, beta = model_layers_14_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_14_norm_out_weight_to_fp16, x = input_797_cast_fp16)[name = tensor("input_799_cast_fp16")]; tensor input_801_axes_0 = const()[name = tensor("input_801_axes_0"), val = tensor([-1])]; - tensor model_layers_15_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371108928)))]; - tensor model_layers_15_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371111040)))]; + tensor model_layers_15_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371078208)))]; + tensor model_layers_15_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371080320)))]; tensor input_801_cast_fp16 = layer_norm(axes = input_801_axes_0, beta = model_layers_15_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_15_norm_feed_forward1_weight_to_fp16, x = input_799_cast_fp16)[name = tensor("input_801_cast_fp16")]; - tensor model_layers_15_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371113152))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375307520))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_15_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371082432))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375276800))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_15_feed_forward1_linear1_weight_to_fp16_quantized, x = input_801_cast_fp16)[name = tensor("linear_136_cast_fp16")]; tensor input_805_cast_fp16 = silu(x = linear_136_cast_fp16)[name = tensor("input_805_cast_fp16")]; - tensor model_layers_15_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375315776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379510144))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_15_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375285056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379479424))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_137_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_15_feed_forward1_linear2_weight_to_fp16_quantized, x = input_805_cast_fp16)[name = tensor("linear_137_cast_fp16")]; tensor var_2672_to_fp16 = const()[name = tensor("op_2672_to_fp16"), val = tensor(0x1p-1)]; tensor var_2673_cast_fp16 = mul(x = linear_137_cast_fp16, y = var_2672_to_fp16)[name = tensor("op_2673_cast_fp16")]; tensor input_811_cast_fp16 = add(x = input_799_cast_fp16, y = var_2673_cast_fp16)[name = tensor("input_811_cast_fp16")]; tensor query_31_axes_0 = const()[name = tensor("query_31_axes_0"), val = tensor([-1])]; - tensor model_layers_15_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379512256)))]; - tensor model_layers_15_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379514368)))]; + tensor model_layers_15_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379481536)))]; + tensor model_layers_15_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379483648)))]; tensor query_31_cast_fp16 = layer_norm(axes = query_31_axes_0, beta = model_layers_15_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_15_norm_self_att_weight_to_fp16, x = input_811_cast_fp16)[name = tensor("query_31_cast_fp16")]; - tensor model_layers_15_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379516480))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(380565120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_15_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379485760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(380534400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_138_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_15_self_attn_linear_q_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor("linear_138_cast_fp16")]; tensor var_2689 = const()[name = tensor("op_2689"), val = tensor([1, -1, 8, 128])]; tensor q_91_cast_fp16 = reshape(shape = var_2689, x = linear_138_cast_fp16)[name = tensor("q_91_cast_fp16")]; - tensor model_layers_15_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(380567232))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381615872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_15_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(380536512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381585152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_139_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_15_self_attn_linear_k_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor("linear_139_cast_fp16")]; tensor var_2693 = const()[name = tensor("op_2693"), val = tensor([1, -1, 8, 128])]; tensor k_61_cast_fp16 = reshape(shape = var_2693, x = linear_139_cast_fp16)[name = tensor("k_61_cast_fp16")]; - tensor model_layers_15_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381617984))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382666624))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_15_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381587264))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382635904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_140_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_15_self_attn_linear_v_weight_to_fp16_quantized, x = query_31_cast_fp16)[name = tensor("linear_140_cast_fp16")]; tensor var_2697 = const()[name = tensor("op_2697"), val = tensor([1, -1, 8, 128])]; tensor v_31_cast_fp16 = reshape(shape = var_2697, x = linear_140_cast_fp16)[name = tensor("v_31_cast_fp16")]; tensor value_31_perm_0 = const()[name = tensor("value_31_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_15_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_15_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382668736)))]; - tensor var_2709_cast_fp16 = add(x = q_91_cast_fp16, y = model_layers_15_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2709_cast_fp16")]; - tensor model_layers_15_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_15_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382670848)))]; - tensor var_2711_cast_fp16 = add(x = q_91_cast_fp16, y = model_layers_15_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2711_cast_fp16")]; + tensor model_layers_15_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382638016))), scale = tensor([0x1.32p-8, 0x1.724p-8, 0x1.874p-8, 0x1.ad4p-8, 0x1.298p-8, 0x1.968p-8, 0x1.9a4p-8, 0x1.c4cp-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2709_cast_fp16 = add(x = q_91_cast_fp16, y = model_layers_15_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_2709_cast_fp16")]; + tensor model_layers_15_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382639104))), scale = tensor([0x1.d04p-9, 0x1.c94p-8, 0x1.ca4p-8, 0x1.1fcp-7, 0x1.6dcp-8, 0x1.2fp-8, 0x1.a74p-8, 0x1.0a8p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2711_cast_fp16 = add(x = q_91_cast_fp16, y = model_layers_15_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_2711_cast_fp16")]; tensor q_with_bias_v_31_perm_0 = const()[name = tensor("q_with_bias_v_31_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_337_transpose_x_0 = const()[name = tensor("x_337_transpose_x_0"), val = tensor(false)]; tensor x_337_transpose_y_0 = const()[name = tensor("x_337_transpose_y_0"), val = tensor(false)]; - tensor op_2713_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2713_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382672960))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382930048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_2713_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2713_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382640192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382897280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_31_cast_fp16 = transpose(perm = q_with_bias_v_31_perm_0, x = var_2711_cast_fp16)[name = tensor("transpose_206")]; tensor x_337_cast_fp16 = matmul(transpose_x = x_337_transpose_x_0, transpose_y = x_337_transpose_y_0, x = q_with_bias_v_31_cast_fp16, y = op_2713_to_fp16_quantized)[name = tensor("x_337_cast_fp16")]; tensor x_339_pad_0 = const()[name = tensor("x_339_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -2239,12 +2239,12 @@ program(1.0) tensor var_2746 = const()[name = tensor("op_2746"), val = tensor([1, -1, 1024])]; tensor var_2745_cast_fp16 = transpose(perm = var_2745_perm_0, x = x_343_cast_fp16)[name = tensor("transpose_202")]; tensor input_815_cast_fp16 = reshape(shape = var_2746, x = var_2745_cast_fp16)[name = tensor("input_815_cast_fp16")]; - tensor model_layers_15_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382930624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383979264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_15_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(382897856))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383946496))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_142_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_15_self_attn_linear_out_weight_to_fp16_quantized, x = input_815_cast_fp16)[name = tensor("linear_142_cast_fp16")]; tensor input_819_cast_fp16 = add(x = input_811_cast_fp16, y = linear_142_cast_fp16)[name = tensor("input_819_cast_fp16")]; tensor x_347_axes_0 = const()[name = tensor("x_347_axes_0"), val = tensor([-1])]; - tensor model_layers_15_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383981376)))]; - tensor model_layers_15_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383983488)))]; + tensor model_layers_15_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383948608)))]; + tensor model_layers_15_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383950720)))]; tensor x_347_cast_fp16 = layer_norm(axes = x_347_axes_0, beta = model_layers_15_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_15_norm_conv_weight_to_fp16, x = input_819_cast_fp16)[name = tensor("x_347_cast_fp16")]; tensor input_821_perm_0 = const()[name = tensor("input_821_perm_0"), val = tensor([0, 2, 1])]; tensor input_823_pad_type_0 = const()[name = tensor("input_823_pad_type_0"), val = tensor("valid")]; @@ -2252,7 +2252,7 @@ program(1.0) tensor input_823_pad_0 = const()[name = tensor("input_823_pad_0"), val = tensor([0, 0])]; tensor input_823_dilations_0 = const()[name = tensor("input_823_dilations_0"), val = tensor([1])]; tensor input_823_groups_0 = const()[name = tensor("input_823_groups_0"), val = tensor(1)]; - tensor model_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383985600))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386082816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383952832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386050048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_821_cast_fp16 = transpose(perm = input_821_perm_0, x = x_347_cast_fp16)[name = tensor("transpose_201")]; tensor input_823_cast_fp16 = conv(dilations = input_823_dilations_0, groups = input_823_groups_0, pad = input_823_pad_0, pad_type = input_823_pad_type_0, strides = input_823_strides_0, weight = model_layers_15_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_821_cast_fp16)[name = tensor("input_823_cast_fp16")]; tensor x_349_split_num_splits_0 = const()[name = tensor("x_349_split_num_splits_0"), val = tensor(2)]; @@ -2270,8 +2270,8 @@ program(1.0) tensor input_829_strides_0 = const()[name = tensor("input_829_strides_0"), val = tensor([1])]; tensor input_829_pad_0 = const()[name = tensor("input_829_pad_0"), val = tensor([0, 0])]; tensor input_829_dilations_0 = const()[name = tensor("input_829_dilations_0"), val = tensor([1])]; - tensor const_278_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_278_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386086976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386096256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_279_to_fp16 = const()[name = tensor("const_279_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386098368)))]; + tensor const_278_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_278_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386054208))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386063488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_279_to_fp16 = const()[name = tensor("const_279_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386065600)))]; tensor input_831_cast_fp16 = conv(bias = const_279_to_fp16, dilations = input_829_dilations_0, groups = input_829_groups_0, pad = input_829_pad_0, pad_type = input_829_pad_type_0, strides = input_829_strides_0, weight = const_278_to_fp16_quantized, x = input_827_cast_fp16)[name = tensor("input_831_cast_fp16")]; tensor input_833_cast_fp16 = silu(x = input_831_cast_fp16)[name = tensor("input_833_cast_fp16")]; tensor x_351_pad_type_0 = const()[name = tensor("x_351_pad_type_0"), val = tensor("valid")]; @@ -2279,64 +2279,64 @@ program(1.0) tensor x_351_pad_0 = const()[name = tensor("x_351_pad_0"), val = tensor([0, 0])]; tensor x_351_dilations_0 = const()[name = tensor("x_351_dilations_0"), val = tensor([1])]; tensor x_351_groups_0 = const()[name = tensor("x_351_groups_0"), val = tensor(1)]; - tensor model_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386100480))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387149120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386067712))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387116352))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_351_cast_fp16 = conv(dilations = x_351_dilations_0, groups = x_351_groups_0, pad = x_351_pad_0, pad_type = x_351_pad_type_0, strides = x_351_strides_0, weight = model_layers_15_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_833_cast_fp16)[name = tensor("x_351_cast_fp16")]; tensor input_835_perm_0 = const()[name = tensor("input_835_perm_0"), val = tensor([0, 2, 1])]; tensor input_835_cast_fp16 = transpose(perm = input_835_perm_0, x = x_351_cast_fp16)[name = tensor("transpose_200")]; tensor input_837_cast_fp16 = add(x = input_819_cast_fp16, y = input_835_cast_fp16)[name = tensor("input_837_cast_fp16")]; tensor input_839_axes_0 = const()[name = tensor("input_839_axes_0"), val = tensor([-1])]; - tensor model_layers_15_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387151232)))]; - tensor model_layers_15_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387153344)))]; + tensor model_layers_15_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387118464)))]; + tensor model_layers_15_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387120576)))]; tensor input_839_cast_fp16 = layer_norm(axes = input_839_axes_0, beta = model_layers_15_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_15_norm_feed_forward2_weight_to_fp16, x = input_837_cast_fp16)[name = tensor("input_839_cast_fp16")]; - tensor model_layers_15_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387155456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391349824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_15_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387122688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391317056))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_15_feed_forward2_linear1_weight_to_fp16_quantized, x = input_839_cast_fp16)[name = tensor("linear_143_cast_fp16")]; tensor input_843_cast_fp16 = silu(x = linear_143_cast_fp16)[name = tensor("input_843_cast_fp16")]; - tensor model_layers_15_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391358080))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395552448))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_15_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_15_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391325312))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395519680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_144_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_15_feed_forward2_linear2_weight_to_fp16_quantized, x = input_843_cast_fp16)[name = tensor("linear_144_cast_fp16")]; tensor var_2806_to_fp16 = const()[name = tensor("op_2806_to_fp16"), val = tensor(0x1p-1)]; tensor var_2807_cast_fp16 = mul(x = linear_144_cast_fp16, y = var_2806_to_fp16)[name = tensor("op_2807_cast_fp16")]; tensor input_849_cast_fp16 = add(x = input_837_cast_fp16, y = var_2807_cast_fp16)[name = tensor("input_849_cast_fp16")]; tensor input_851_axes_0 = const()[name = tensor("input_851_axes_0"), val = tensor([-1])]; - tensor model_layers_15_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395554560)))]; - tensor model_layers_15_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395556672)))]; + tensor model_layers_15_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_15_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395521792)))]; + tensor model_layers_15_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_15_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395523904)))]; tensor input_851_cast_fp16 = layer_norm(axes = input_851_axes_0, beta = model_layers_15_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_15_norm_out_weight_to_fp16, x = input_849_cast_fp16)[name = tensor("input_851_cast_fp16")]; tensor input_853_axes_0 = const()[name = tensor("input_853_axes_0"), val = tensor([-1])]; - tensor model_layers_16_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395558784)))]; - tensor model_layers_16_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395560896)))]; + tensor model_layers_16_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395526016)))]; + tensor model_layers_16_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395528128)))]; tensor input_853_cast_fp16 = layer_norm(axes = input_853_axes_0, beta = model_layers_16_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_16_norm_feed_forward1_weight_to_fp16, x = input_851_cast_fp16)[name = tensor("input_853_cast_fp16")]; - tensor model_layers_16_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395563008))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399757376))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_16_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395530240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399724608))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_145_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_16_feed_forward1_linear1_weight_to_fp16_quantized, x = input_853_cast_fp16)[name = tensor("linear_145_cast_fp16")]; tensor input_857_cast_fp16 = silu(x = linear_145_cast_fp16)[name = tensor("input_857_cast_fp16")]; - tensor model_layers_16_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399765632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403960000))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_16_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399732864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403927232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_146_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_16_feed_forward1_linear2_weight_to_fp16_quantized, x = input_857_cast_fp16)[name = tensor("linear_146_cast_fp16")]; tensor var_2835_to_fp16 = const()[name = tensor("op_2835_to_fp16"), val = tensor(0x1p-1)]; tensor var_2836_cast_fp16 = mul(x = linear_146_cast_fp16, y = var_2835_to_fp16)[name = tensor("op_2836_cast_fp16")]; tensor input_863_cast_fp16 = add(x = input_851_cast_fp16, y = var_2836_cast_fp16)[name = tensor("input_863_cast_fp16")]; tensor query_33_axes_0 = const()[name = tensor("query_33_axes_0"), val = tensor([-1])]; - tensor model_layers_16_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403962112)))]; - tensor model_layers_16_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403964224)))]; + tensor model_layers_16_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403929344)))]; + tensor model_layers_16_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403931456)))]; tensor query_33_cast_fp16 = layer_norm(axes = query_33_axes_0, beta = model_layers_16_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_16_norm_self_att_weight_to_fp16, x = input_863_cast_fp16)[name = tensor("query_33_cast_fp16")]; - tensor model_layers_16_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403966336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405014976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_16_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(403933568))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404982208))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_147_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_16_self_attn_linear_q_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor("linear_147_cast_fp16")]; tensor var_2852 = const()[name = tensor("op_2852"), val = tensor([1, -1, 8, 128])]; tensor q_97_cast_fp16 = reshape(shape = var_2852, x = linear_147_cast_fp16)[name = tensor("q_97_cast_fp16")]; - tensor model_layers_16_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405017088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406065728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_16_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404984320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406032960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_148_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_16_self_attn_linear_k_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor("linear_148_cast_fp16")]; tensor var_2856 = const()[name = tensor("op_2856"), val = tensor([1, -1, 8, 128])]; tensor k_65_cast_fp16 = reshape(shape = var_2856, x = linear_148_cast_fp16)[name = tensor("k_65_cast_fp16")]; - tensor model_layers_16_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406067840))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407116480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_16_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(406035072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407083712))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_149_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_16_self_attn_linear_v_weight_to_fp16_quantized, x = query_33_cast_fp16)[name = tensor("linear_149_cast_fp16")]; tensor var_2860 = const()[name = tensor("op_2860"), val = tensor([1, -1, 8, 128])]; tensor v_33_cast_fp16 = reshape(shape = var_2860, x = linear_149_cast_fp16)[name = tensor("v_33_cast_fp16")]; tensor value_33_perm_0 = const()[name = tensor("value_33_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_16_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_16_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407118592)))]; - tensor var_2872_cast_fp16 = add(x = q_97_cast_fp16, y = model_layers_16_self_attn_pos_bias_u_to_fp16)[name = tensor("op_2872_cast_fp16")]; - tensor model_layers_16_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_16_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407120704)))]; - tensor var_2874_cast_fp16 = add(x = q_97_cast_fp16, y = model_layers_16_self_attn_pos_bias_v_to_fp16)[name = tensor("op_2874_cast_fp16")]; + tensor model_layers_16_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407085824))), scale = tensor([0x1.74cp-7, 0x1.5b4p-7, 0x1.0e8p-8, 0x1.f6p-8, 0x1.3a4p-7, 0x1.868p-7, 0x1.04p-7, 0x1.644p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2872_cast_fp16 = add(x = q_97_cast_fp16, y = model_layers_16_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_2872_cast_fp16")]; + tensor model_layers_16_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407086912))), scale = tensor([0x1.184p-8, 0x1.8c8p-9, 0x1.5d8p-8, 0x1.2ccp-7, 0x1.ffcp-8, 0x1.384p-7, 0x1.6acp-8, 0x1.35cp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_2874_cast_fp16 = add(x = q_97_cast_fp16, y = model_layers_16_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_2874_cast_fp16")]; tensor q_with_bias_v_33_perm_0 = const()[name = tensor("q_with_bias_v_33_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_359_transpose_x_0 = const()[name = tensor("x_359_transpose_x_0"), val = tensor(false)]; tensor x_359_transpose_y_0 = const()[name = tensor("x_359_transpose_y_0"), val = tensor(false)]; - tensor op_2876_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2876_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407122816))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407379904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_2876_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_2876_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407088000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407345088))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_33_cast_fp16 = transpose(perm = q_with_bias_v_33_perm_0, x = var_2874_cast_fp16)[name = tensor("transpose_199")]; tensor x_359_cast_fp16 = matmul(transpose_x = x_359_transpose_x_0, transpose_y = x_359_transpose_y_0, x = q_with_bias_v_33_cast_fp16, y = op_2876_to_fp16_quantized)[name = tensor("x_359_cast_fp16")]; tensor x_361_pad_0 = const()[name = tensor("x_361_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -2376,12 +2376,12 @@ program(1.0) tensor var_2909 = const()[name = tensor("op_2909"), val = tensor([1, -1, 1024])]; tensor var_2908_cast_fp16 = transpose(perm = var_2908_perm_0, x = x_365_cast_fp16)[name = tensor("transpose_195")]; tensor input_867_cast_fp16 = reshape(shape = var_2909, x = var_2908_cast_fp16)[name = tensor("input_867_cast_fp16")]; - tensor model_layers_16_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407380480))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408429120))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_16_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407345664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408394304))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_151_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_16_self_attn_linear_out_weight_to_fp16_quantized, x = input_867_cast_fp16)[name = tensor("linear_151_cast_fp16")]; tensor input_871_cast_fp16 = add(x = input_863_cast_fp16, y = linear_151_cast_fp16)[name = tensor("input_871_cast_fp16")]; tensor x_369_axes_0 = const()[name = tensor("x_369_axes_0"), val = tensor([-1])]; - tensor model_layers_16_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408431232)))]; - tensor model_layers_16_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408433344)))]; + tensor model_layers_16_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408396416)))]; + tensor model_layers_16_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408398528)))]; tensor x_369_cast_fp16 = layer_norm(axes = x_369_axes_0, beta = model_layers_16_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_16_norm_conv_weight_to_fp16, x = input_871_cast_fp16)[name = tensor("x_369_cast_fp16")]; tensor input_873_perm_0 = const()[name = tensor("input_873_perm_0"), val = tensor([0, 2, 1])]; tensor input_875_pad_type_0 = const()[name = tensor("input_875_pad_type_0"), val = tensor("valid")]; @@ -2389,7 +2389,7 @@ program(1.0) tensor input_875_pad_0 = const()[name = tensor("input_875_pad_0"), val = tensor([0, 0])]; tensor input_875_dilations_0 = const()[name = tensor("input_875_dilations_0"), val = tensor([1])]; tensor input_875_groups_0 = const()[name = tensor("input_875_groups_0"), val = tensor(1)]; - tensor model_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408435456))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410532672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408400640))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410497856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_873_cast_fp16 = transpose(perm = input_873_perm_0, x = x_369_cast_fp16)[name = tensor("transpose_194")]; tensor input_875_cast_fp16 = conv(dilations = input_875_dilations_0, groups = input_875_groups_0, pad = input_875_pad_0, pad_type = input_875_pad_type_0, strides = input_875_strides_0, weight = model_layers_16_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_873_cast_fp16)[name = tensor("input_875_cast_fp16")]; tensor x_371_split_num_splits_0 = const()[name = tensor("x_371_split_num_splits_0"), val = tensor(2)]; @@ -2407,8 +2407,8 @@ program(1.0) tensor input_881_strides_0 = const()[name = tensor("input_881_strides_0"), val = tensor([1])]; tensor input_881_pad_0 = const()[name = tensor("input_881_pad_0"), val = tensor([0, 0])]; tensor input_881_dilations_0 = const()[name = tensor("input_881_dilations_0"), val = tensor([1])]; - tensor const_280_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_280_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410536832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410546112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_281_to_fp16 = const()[name = tensor("const_281_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410548224)))]; + tensor const_280_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_280_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410502016))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410511296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_281_to_fp16 = const()[name = tensor("const_281_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410513408)))]; tensor input_883_cast_fp16 = conv(bias = const_281_to_fp16, dilations = input_881_dilations_0, groups = input_881_groups_0, pad = input_881_pad_0, pad_type = input_881_pad_type_0, strides = input_881_strides_0, weight = const_280_to_fp16_quantized, x = input_879_cast_fp16)[name = tensor("input_883_cast_fp16")]; tensor input_885_cast_fp16 = silu(x = input_883_cast_fp16)[name = tensor("input_885_cast_fp16")]; tensor x_373_pad_type_0 = const()[name = tensor("x_373_pad_type_0"), val = tensor("valid")]; @@ -2416,64 +2416,64 @@ program(1.0) tensor x_373_pad_0 = const()[name = tensor("x_373_pad_0"), val = tensor([0, 0])]; tensor x_373_dilations_0 = const()[name = tensor("x_373_dilations_0"), val = tensor([1])]; tensor x_373_groups_0 = const()[name = tensor("x_373_groups_0"), val = tensor(1)]; - tensor model_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410550336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411598976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410515520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411564160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_373_cast_fp16 = conv(dilations = x_373_dilations_0, groups = x_373_groups_0, pad = x_373_pad_0, pad_type = x_373_pad_type_0, strides = x_373_strides_0, weight = model_layers_16_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_885_cast_fp16)[name = tensor("x_373_cast_fp16")]; tensor input_887_perm_0 = const()[name = tensor("input_887_perm_0"), val = tensor([0, 2, 1])]; tensor input_887_cast_fp16 = transpose(perm = input_887_perm_0, x = x_373_cast_fp16)[name = tensor("transpose_193")]; tensor input_889_cast_fp16 = add(x = input_871_cast_fp16, y = input_887_cast_fp16)[name = tensor("input_889_cast_fp16")]; tensor input_891_axes_0 = const()[name = tensor("input_891_axes_0"), val = tensor([-1])]; - tensor model_layers_16_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411601088)))]; - tensor model_layers_16_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411603200)))]; + tensor model_layers_16_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411566272)))]; + tensor model_layers_16_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411568384)))]; tensor input_891_cast_fp16 = layer_norm(axes = input_891_axes_0, beta = model_layers_16_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_16_norm_feed_forward2_weight_to_fp16, x = input_889_cast_fp16)[name = tensor("input_891_cast_fp16")]; - tensor model_layers_16_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411605312))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(415799680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_16_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411570496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(415764864))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_152_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_16_feed_forward2_linear1_weight_to_fp16_quantized, x = input_891_cast_fp16)[name = tensor("linear_152_cast_fp16")]; tensor input_895_cast_fp16 = silu(x = linear_152_cast_fp16)[name = tensor("input_895_cast_fp16")]; - tensor model_layers_16_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(415807936))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420002304))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_16_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_16_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(415773120))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(419967488))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_153_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_16_feed_forward2_linear2_weight_to_fp16_quantized, x = input_895_cast_fp16)[name = tensor("linear_153_cast_fp16")]; tensor var_2969_to_fp16 = const()[name = tensor("op_2969_to_fp16"), val = tensor(0x1p-1)]; tensor var_2970_cast_fp16 = mul(x = linear_153_cast_fp16, y = var_2969_to_fp16)[name = tensor("op_2970_cast_fp16")]; tensor input_901_cast_fp16 = add(x = input_889_cast_fp16, y = var_2970_cast_fp16)[name = tensor("input_901_cast_fp16")]; tensor input_903_axes_0 = const()[name = tensor("input_903_axes_0"), val = tensor([-1])]; - tensor model_layers_16_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420004416)))]; - tensor model_layers_16_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420006528)))]; + tensor model_layers_16_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_16_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(419969600)))]; + tensor model_layers_16_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_16_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(419971712)))]; tensor input_903_cast_fp16 = layer_norm(axes = input_903_axes_0, beta = model_layers_16_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_16_norm_out_weight_to_fp16, x = input_901_cast_fp16)[name = tensor("input_903_cast_fp16")]; tensor input_905_axes_0 = const()[name = tensor("input_905_axes_0"), val = tensor([-1])]; - tensor model_layers_17_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420008640)))]; - tensor model_layers_17_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420010752)))]; + tensor model_layers_17_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(419973824)))]; + tensor model_layers_17_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(419975936)))]; tensor input_905_cast_fp16 = layer_norm(axes = input_905_axes_0, beta = model_layers_17_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_17_norm_feed_forward1_weight_to_fp16, x = input_903_cast_fp16)[name = tensor("input_905_cast_fp16")]; - tensor model_layers_17_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420012864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(424207232))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_17_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(419978048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(424172416))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_154_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_17_feed_forward1_linear1_weight_to_fp16_quantized, x = input_905_cast_fp16)[name = tensor("linear_154_cast_fp16")]; tensor input_909_cast_fp16 = silu(x = linear_154_cast_fp16)[name = tensor("input_909_cast_fp16")]; - tensor model_layers_17_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(424215488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428409856))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_17_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(424180672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428375040))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_155_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_17_feed_forward1_linear2_weight_to_fp16_quantized, x = input_909_cast_fp16)[name = tensor("linear_155_cast_fp16")]; tensor var_2998_to_fp16 = const()[name = tensor("op_2998_to_fp16"), val = tensor(0x1p-1)]; tensor var_2999_cast_fp16 = mul(x = linear_155_cast_fp16, y = var_2998_to_fp16)[name = tensor("op_2999_cast_fp16")]; tensor input_915_cast_fp16 = add(x = input_903_cast_fp16, y = var_2999_cast_fp16)[name = tensor("input_915_cast_fp16")]; tensor query_35_axes_0 = const()[name = tensor("query_35_axes_0"), val = tensor([-1])]; - tensor model_layers_17_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428411968)))]; - tensor model_layers_17_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428414080)))]; + tensor model_layers_17_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428377152)))]; + tensor model_layers_17_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428379264)))]; tensor query_35_cast_fp16 = layer_norm(axes = query_35_axes_0, beta = model_layers_17_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_17_norm_self_att_weight_to_fp16, x = input_915_cast_fp16)[name = tensor("query_35_cast_fp16")]; - tensor model_layers_17_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428416192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(429464832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_17_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(428381376))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(429430016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_156_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_17_self_attn_linear_q_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor("linear_156_cast_fp16")]; tensor var_3015 = const()[name = tensor("op_3015"), val = tensor([1, -1, 8, 128])]; tensor q_103_cast_fp16 = reshape(shape = var_3015, x = linear_156_cast_fp16)[name = tensor("q_103_cast_fp16")]; - tensor model_layers_17_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(429466944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430515584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_17_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(429432128))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430480768))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_157_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_17_self_attn_linear_k_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor("linear_157_cast_fp16")]; tensor var_3019 = const()[name = tensor("op_3019"), val = tensor([1, -1, 8, 128])]; tensor k_69_cast_fp16 = reshape(shape = var_3019, x = linear_157_cast_fp16)[name = tensor("k_69_cast_fp16")]; - tensor model_layers_17_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430517696))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431566336))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_17_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(430482880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431531520))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_158_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_17_self_attn_linear_v_weight_to_fp16_quantized, x = query_35_cast_fp16)[name = tensor("linear_158_cast_fp16")]; tensor var_3023 = const()[name = tensor("op_3023"), val = tensor([1, -1, 8, 128])]; tensor v_35_cast_fp16 = reshape(shape = var_3023, x = linear_158_cast_fp16)[name = tensor("v_35_cast_fp16")]; tensor value_35_perm_0 = const()[name = tensor("value_35_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_17_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_17_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431568448)))]; - tensor var_3035_cast_fp16 = add(x = q_103_cast_fp16, y = model_layers_17_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3035_cast_fp16")]; - tensor model_layers_17_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_17_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431570560)))]; - tensor var_3037_cast_fp16 = add(x = q_103_cast_fp16, y = model_layers_17_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3037_cast_fp16")]; + tensor model_layers_17_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431533632))), scale = tensor([0x1.cep-8, 0x1.444p-8, 0x1.424p-8, 0x1.54p-9, 0x1.4a8p-8, 0x1.6e4p-8, 0x1.034p-8, 0x1.62p-9]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3035_cast_fp16 = add(x = q_103_cast_fp16, y = model_layers_17_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_3035_cast_fp16")]; + tensor model_layers_17_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431534720))), scale = tensor([0x1.448p-7, 0x1.37cp-7, 0x1.4f8p-8, 0x1.2b4p-7, 0x1.644p-7, 0x1.b48p-10, 0x1.898p-9, 0x1.864p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3037_cast_fp16 = add(x = q_103_cast_fp16, y = model_layers_17_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_3037_cast_fp16")]; tensor q_with_bias_v_35_perm_0 = const()[name = tensor("q_with_bias_v_35_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_381_transpose_x_0 = const()[name = tensor("x_381_transpose_x_0"), val = tensor(false)]; tensor x_381_transpose_y_0 = const()[name = tensor("x_381_transpose_y_0"), val = tensor(false)]; - tensor op_3039_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3039_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431572672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431829760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_3039_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3039_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431535808))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431792896))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_35_cast_fp16 = transpose(perm = q_with_bias_v_35_perm_0, x = var_3037_cast_fp16)[name = tensor("transpose_192")]; tensor x_381_cast_fp16 = matmul(transpose_x = x_381_transpose_x_0, transpose_y = x_381_transpose_y_0, x = q_with_bias_v_35_cast_fp16, y = op_3039_to_fp16_quantized)[name = tensor("x_381_cast_fp16")]; tensor x_383_pad_0 = const()[name = tensor("x_383_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -2513,12 +2513,12 @@ program(1.0) tensor var_3072 = const()[name = tensor("op_3072"), val = tensor([1, -1, 1024])]; tensor var_3071_cast_fp16 = transpose(perm = var_3071_perm_0, x = x_387_cast_fp16)[name = tensor("transpose_188")]; tensor input_919_cast_fp16 = reshape(shape = var_3072, x = var_3071_cast_fp16)[name = tensor("input_919_cast_fp16")]; - tensor model_layers_17_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431830336))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432878976))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_17_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431793472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432842112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_160_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_17_self_attn_linear_out_weight_to_fp16_quantized, x = input_919_cast_fp16)[name = tensor("linear_160_cast_fp16")]; tensor input_923_cast_fp16 = add(x = input_915_cast_fp16, y = linear_160_cast_fp16)[name = tensor("input_923_cast_fp16")]; tensor x_391_axes_0 = const()[name = tensor("x_391_axes_0"), val = tensor([-1])]; - tensor model_layers_17_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432881088)))]; - tensor model_layers_17_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432883200)))]; + tensor model_layers_17_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432844224)))]; + tensor model_layers_17_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432846336)))]; tensor x_391_cast_fp16 = layer_norm(axes = x_391_axes_0, beta = model_layers_17_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_17_norm_conv_weight_to_fp16, x = input_923_cast_fp16)[name = tensor("x_391_cast_fp16")]; tensor input_925_perm_0 = const()[name = tensor("input_925_perm_0"), val = tensor([0, 2, 1])]; tensor input_927_pad_type_0 = const()[name = tensor("input_927_pad_type_0"), val = tensor("valid")]; @@ -2526,7 +2526,7 @@ program(1.0) tensor input_927_pad_0 = const()[name = tensor("input_927_pad_0"), val = tensor([0, 0])]; tensor input_927_dilations_0 = const()[name = tensor("input_927_dilations_0"), val = tensor([1])]; tensor input_927_groups_0 = const()[name = tensor("input_927_groups_0"), val = tensor(1)]; - tensor model_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432885312))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434982528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(432848448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434945664))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_925_cast_fp16 = transpose(perm = input_925_perm_0, x = x_391_cast_fp16)[name = tensor("transpose_187")]; tensor input_927_cast_fp16 = conv(dilations = input_927_dilations_0, groups = input_927_groups_0, pad = input_927_pad_0, pad_type = input_927_pad_type_0, strides = input_927_strides_0, weight = model_layers_17_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_925_cast_fp16)[name = tensor("input_927_cast_fp16")]; tensor x_393_split_num_splits_0 = const()[name = tensor("x_393_split_num_splits_0"), val = tensor(2)]; @@ -2544,8 +2544,8 @@ program(1.0) tensor input_933_strides_0 = const()[name = tensor("input_933_strides_0"), val = tensor([1])]; tensor input_933_pad_0 = const()[name = tensor("input_933_pad_0"), val = tensor([0, 0])]; tensor input_933_dilations_0 = const()[name = tensor("input_933_dilations_0"), val = tensor([1])]; - tensor const_282_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_282_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434986688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434995968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_283_to_fp16 = const()[name = tensor("const_283_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434998080)))]; + tensor const_282_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_282_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434949824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434959104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_283_to_fp16 = const()[name = tensor("const_283_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434961216)))]; tensor input_935_cast_fp16 = conv(bias = const_283_to_fp16, dilations = input_933_dilations_0, groups = input_933_groups_0, pad = input_933_pad_0, pad_type = input_933_pad_type_0, strides = input_933_strides_0, weight = const_282_to_fp16_quantized, x = input_931_cast_fp16)[name = tensor("input_935_cast_fp16")]; tensor input_937_cast_fp16 = silu(x = input_935_cast_fp16)[name = tensor("input_937_cast_fp16")]; tensor x_395_pad_type_0 = const()[name = tensor("x_395_pad_type_0"), val = tensor("valid")]; @@ -2553,64 +2553,64 @@ program(1.0) tensor x_395_pad_0 = const()[name = tensor("x_395_pad_0"), val = tensor([0, 0])]; tensor x_395_dilations_0 = const()[name = tensor("x_395_dilations_0"), val = tensor([1])]; tensor x_395_groups_0 = const()[name = tensor("x_395_groups_0"), val = tensor(1)]; - tensor model_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(435000192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436048832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434963328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436011968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_395_cast_fp16 = conv(dilations = x_395_dilations_0, groups = x_395_groups_0, pad = x_395_pad_0, pad_type = x_395_pad_type_0, strides = x_395_strides_0, weight = model_layers_17_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_937_cast_fp16)[name = tensor("x_395_cast_fp16")]; tensor input_939_perm_0 = const()[name = tensor("input_939_perm_0"), val = tensor([0, 2, 1])]; tensor input_939_cast_fp16 = transpose(perm = input_939_perm_0, x = x_395_cast_fp16)[name = tensor("transpose_186")]; tensor input_941_cast_fp16 = add(x = input_923_cast_fp16, y = input_939_cast_fp16)[name = tensor("input_941_cast_fp16")]; tensor input_943_axes_0 = const()[name = tensor("input_943_axes_0"), val = tensor([-1])]; - tensor model_layers_17_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436050944)))]; - tensor model_layers_17_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436053056)))]; + tensor model_layers_17_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436014080)))]; + tensor model_layers_17_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436016192)))]; tensor input_943_cast_fp16 = layer_norm(axes = input_943_axes_0, beta = model_layers_17_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_17_norm_feed_forward2_weight_to_fp16, x = input_941_cast_fp16)[name = tensor("input_943_cast_fp16")]; - tensor model_layers_17_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436055168))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(440249536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_17_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(436018304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(440212672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_161_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_17_feed_forward2_linear1_weight_to_fp16_quantized, x = input_943_cast_fp16)[name = tensor("linear_161_cast_fp16")]; tensor input_947_cast_fp16 = silu(x = linear_161_cast_fp16)[name = tensor("input_947_cast_fp16")]; - tensor model_layers_17_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(440257792))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444452160))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_17_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_17_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(440220928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444415296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_162_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_17_feed_forward2_linear2_weight_to_fp16_quantized, x = input_947_cast_fp16)[name = tensor("linear_162_cast_fp16")]; tensor var_3132_to_fp16 = const()[name = tensor("op_3132_to_fp16"), val = tensor(0x1p-1)]; tensor var_3133_cast_fp16 = mul(x = linear_162_cast_fp16, y = var_3132_to_fp16)[name = tensor("op_3133_cast_fp16")]; tensor input_953_cast_fp16 = add(x = input_941_cast_fp16, y = var_3133_cast_fp16)[name = tensor("input_953_cast_fp16")]; tensor input_955_axes_0 = const()[name = tensor("input_955_axes_0"), val = tensor([-1])]; - tensor model_layers_17_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444454272)))]; - tensor model_layers_17_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444456384)))]; + tensor model_layers_17_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_17_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444417408)))]; + tensor model_layers_17_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_17_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444419520)))]; tensor input_955_cast_fp16 = layer_norm(axes = input_955_axes_0, beta = model_layers_17_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_17_norm_out_weight_to_fp16, x = input_953_cast_fp16)[name = tensor("input_955_cast_fp16")]; tensor input_957_axes_0 = const()[name = tensor("input_957_axes_0"), val = tensor([-1])]; - tensor model_layers_18_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444458496)))]; - tensor model_layers_18_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444460608)))]; + tensor model_layers_18_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444421632)))]; + tensor model_layers_18_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444423744)))]; tensor input_957_cast_fp16 = layer_norm(axes = input_957_axes_0, beta = model_layers_18_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_18_norm_feed_forward1_weight_to_fp16, x = input_955_cast_fp16)[name = tensor("input_957_cast_fp16")]; - tensor model_layers_18_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444462720))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(448657088))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_18_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(444425856))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(448620224))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_18_feed_forward1_linear1_weight_to_fp16_quantized, x = input_957_cast_fp16)[name = tensor("linear_163_cast_fp16")]; tensor input_961_cast_fp16 = silu(x = linear_163_cast_fp16)[name = tensor("input_961_cast_fp16")]; - tensor model_layers_18_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(448665344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452859712))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_18_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(448628480))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452822848))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_164_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_18_feed_forward1_linear2_weight_to_fp16_quantized, x = input_961_cast_fp16)[name = tensor("linear_164_cast_fp16")]; tensor var_3161_to_fp16 = const()[name = tensor("op_3161_to_fp16"), val = tensor(0x1p-1)]; tensor var_3162_cast_fp16 = mul(x = linear_164_cast_fp16, y = var_3161_to_fp16)[name = tensor("op_3162_cast_fp16")]; tensor input_967_cast_fp16 = add(x = input_955_cast_fp16, y = var_3162_cast_fp16)[name = tensor("input_967_cast_fp16")]; tensor query_37_axes_0 = const()[name = tensor("query_37_axes_0"), val = tensor([-1])]; - tensor model_layers_18_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452861824)))]; - tensor model_layers_18_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452863936)))]; + tensor model_layers_18_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452824960)))]; + tensor model_layers_18_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452827072)))]; tensor query_37_cast_fp16 = layer_norm(axes = query_37_axes_0, beta = model_layers_18_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_18_norm_self_att_weight_to_fp16, x = input_967_cast_fp16)[name = tensor("query_37_cast_fp16")]; - tensor model_layers_18_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452866048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453914688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_18_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(452829184))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453877824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_165_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_18_self_attn_linear_q_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor("linear_165_cast_fp16")]; tensor var_3178 = const()[name = tensor("op_3178"), val = tensor([1, -1, 8, 128])]; tensor q_109_cast_fp16 = reshape(shape = var_3178, x = linear_165_cast_fp16)[name = tensor("q_109_cast_fp16")]; - tensor model_layers_18_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453916800))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(454965440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_18_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453879936))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(454928576))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_166_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_18_self_attn_linear_k_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor("linear_166_cast_fp16")]; tensor var_3182 = const()[name = tensor("op_3182"), val = tensor([1, -1, 8, 128])]; tensor k_73_cast_fp16 = reshape(shape = var_3182, x = linear_166_cast_fp16)[name = tensor("k_73_cast_fp16")]; - tensor model_layers_18_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(454967552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456016192))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_18_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(454930688))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455979328))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_167_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_18_self_attn_linear_v_weight_to_fp16_quantized, x = query_37_cast_fp16)[name = tensor("linear_167_cast_fp16")]; tensor var_3186 = const()[name = tensor("op_3186"), val = tensor([1, -1, 8, 128])]; tensor v_37_cast_fp16 = reshape(shape = var_3186, x = linear_167_cast_fp16)[name = tensor("v_37_cast_fp16")]; tensor value_37_perm_0 = const()[name = tensor("value_37_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_18_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_18_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456018304)))]; - tensor var_3198_cast_fp16 = add(x = q_109_cast_fp16, y = model_layers_18_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3198_cast_fp16")]; - tensor model_layers_18_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_18_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456020416)))]; - tensor var_3200_cast_fp16 = add(x = q_109_cast_fp16, y = model_layers_18_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3200_cast_fp16")]; + tensor model_layers_18_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455981440))), scale = tensor([0x1.8ep-8, 0x1.86p-9, 0x1.d8cp-9, 0x1.808p-8, 0x1.76cp-8, 0x1.eb4p-9, 0x1.76p-8, 0x1.2bp-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3198_cast_fp16 = add(x = q_109_cast_fp16, y = model_layers_18_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_3198_cast_fp16")]; + tensor model_layers_18_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455982528))), scale = tensor([0x1.a78p-9, 0x1.a44p-9, 0x1.3a8p-8, 0x1.194p-8, 0x1.7cp-9, 0x1.31p-7, 0x1.464p-7, 0x1.404p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3200_cast_fp16 = add(x = q_109_cast_fp16, y = model_layers_18_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_3200_cast_fp16")]; tensor q_with_bias_v_37_perm_0 = const()[name = tensor("q_with_bias_v_37_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_403_transpose_x_0 = const()[name = tensor("x_403_transpose_x_0"), val = tensor(false)]; tensor x_403_transpose_y_0 = const()[name = tensor("x_403_transpose_y_0"), val = tensor(false)]; - tensor op_3202_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3202_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456022528))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456279616))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_3202_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3202_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(455983616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456240704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_37_cast_fp16 = transpose(perm = q_with_bias_v_37_perm_0, x = var_3200_cast_fp16)[name = tensor("transpose_185")]; tensor x_403_cast_fp16 = matmul(transpose_x = x_403_transpose_x_0, transpose_y = x_403_transpose_y_0, x = q_with_bias_v_37_cast_fp16, y = op_3202_to_fp16_quantized)[name = tensor("x_403_cast_fp16")]; tensor x_405_pad_0 = const()[name = tensor("x_405_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -2650,12 +2650,12 @@ program(1.0) tensor var_3235 = const()[name = tensor("op_3235"), val = tensor([1, -1, 1024])]; tensor var_3234_cast_fp16 = transpose(perm = var_3234_perm_0, x = x_409_cast_fp16)[name = tensor("transpose_181")]; tensor input_971_cast_fp16 = reshape(shape = var_3235, x = var_3234_cast_fp16)[name = tensor("input_971_cast_fp16")]; - tensor model_layers_18_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456280192))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457328832))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_18_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456241280))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457289920))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_169_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_18_self_attn_linear_out_weight_to_fp16_quantized, x = input_971_cast_fp16)[name = tensor("linear_169_cast_fp16")]; tensor input_975_cast_fp16 = add(x = input_967_cast_fp16, y = linear_169_cast_fp16)[name = tensor("input_975_cast_fp16")]; tensor x_413_axes_0 = const()[name = tensor("x_413_axes_0"), val = tensor([-1])]; - tensor model_layers_18_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457330944)))]; - tensor model_layers_18_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457333056)))]; + tensor model_layers_18_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457292032)))]; + tensor model_layers_18_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457294144)))]; tensor x_413_cast_fp16 = layer_norm(axes = x_413_axes_0, beta = model_layers_18_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_18_norm_conv_weight_to_fp16, x = input_975_cast_fp16)[name = tensor("x_413_cast_fp16")]; tensor input_977_perm_0 = const()[name = tensor("input_977_perm_0"), val = tensor([0, 2, 1])]; tensor input_979_pad_type_0 = const()[name = tensor("input_979_pad_type_0"), val = tensor("valid")]; @@ -2663,7 +2663,7 @@ program(1.0) tensor input_979_pad_0 = const()[name = tensor("input_979_pad_0"), val = tensor([0, 0])]; tensor input_979_dilations_0 = const()[name = tensor("input_979_dilations_0"), val = tensor([1])]; tensor input_979_groups_0 = const()[name = tensor("input_979_groups_0"), val = tensor(1)]; - tensor model_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457335168))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459432384))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457296256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459393472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_977_cast_fp16 = transpose(perm = input_977_perm_0, x = x_413_cast_fp16)[name = tensor("transpose_180")]; tensor input_979_cast_fp16 = conv(dilations = input_979_dilations_0, groups = input_979_groups_0, pad = input_979_pad_0, pad_type = input_979_pad_type_0, strides = input_979_strides_0, weight = model_layers_18_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_977_cast_fp16)[name = tensor("input_979_cast_fp16")]; tensor x_415_split_num_splits_0 = const()[name = tensor("x_415_split_num_splits_0"), val = tensor(2)]; @@ -2681,8 +2681,8 @@ program(1.0) tensor input_985_strides_0 = const()[name = tensor("input_985_strides_0"), val = tensor([1])]; tensor input_985_pad_0 = const()[name = tensor("input_985_pad_0"), val = tensor([0, 0])]; tensor input_985_dilations_0 = const()[name = tensor("input_985_dilations_0"), val = tensor([1])]; - tensor const_284_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_284_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459436544))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459445824))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_285_to_fp16 = const()[name = tensor("const_285_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459447936)))]; + tensor const_284_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_284_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459397632))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459406912))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_285_to_fp16 = const()[name = tensor("const_285_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459409024)))]; tensor input_987_cast_fp16 = conv(bias = const_285_to_fp16, dilations = input_985_dilations_0, groups = input_985_groups_0, pad = input_985_pad_0, pad_type = input_985_pad_type_0, strides = input_985_strides_0, weight = const_284_to_fp16_quantized, x = input_983_cast_fp16)[name = tensor("input_987_cast_fp16")]; tensor input_989_cast_fp16 = silu(x = input_987_cast_fp16)[name = tensor("input_989_cast_fp16")]; tensor x_417_pad_type_0 = const()[name = tensor("x_417_pad_type_0"), val = tensor("valid")]; @@ -2690,64 +2690,64 @@ program(1.0) tensor x_417_pad_0 = const()[name = tensor("x_417_pad_0"), val = tensor([0, 0])]; tensor x_417_dilations_0 = const()[name = tensor("x_417_dilations_0"), val = tensor([1])]; tensor x_417_groups_0 = const()[name = tensor("x_417_groups_0"), val = tensor(1)]; - tensor model_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459450048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460498688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(459411136))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460459776))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_417_cast_fp16 = conv(dilations = x_417_dilations_0, groups = x_417_groups_0, pad = x_417_pad_0, pad_type = x_417_pad_type_0, strides = x_417_strides_0, weight = model_layers_18_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_989_cast_fp16)[name = tensor("x_417_cast_fp16")]; tensor input_991_perm_0 = const()[name = tensor("input_991_perm_0"), val = tensor([0, 2, 1])]; tensor input_991_cast_fp16 = transpose(perm = input_991_perm_0, x = x_417_cast_fp16)[name = tensor("transpose_179")]; tensor input_993_cast_fp16 = add(x = input_975_cast_fp16, y = input_991_cast_fp16)[name = tensor("input_993_cast_fp16")]; tensor input_995_axes_0 = const()[name = tensor("input_995_axes_0"), val = tensor([-1])]; - tensor model_layers_18_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460500800)))]; - tensor model_layers_18_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460502912)))]; + tensor model_layers_18_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460461888)))]; + tensor model_layers_18_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460464000)))]; tensor input_995_cast_fp16 = layer_norm(axes = input_995_axes_0, beta = model_layers_18_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_18_norm_feed_forward2_weight_to_fp16, x = input_993_cast_fp16)[name = tensor("input_995_cast_fp16")]; - tensor model_layers_18_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460505024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(464699392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_18_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460466112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(464660480))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_18_feed_forward2_linear1_weight_to_fp16_quantized, x = input_995_cast_fp16)[name = tensor("linear_170_cast_fp16")]; tensor input_999_cast_fp16 = silu(x = linear_170_cast_fp16)[name = tensor("input_999_cast_fp16")]; - tensor model_layers_18_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(464707648))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468902016))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_18_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_18_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(464668736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468863104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_171_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_18_feed_forward2_linear2_weight_to_fp16_quantized, x = input_999_cast_fp16)[name = tensor("linear_171_cast_fp16")]; tensor var_3295_to_fp16 = const()[name = tensor("op_3295_to_fp16"), val = tensor(0x1p-1)]; tensor var_3296_cast_fp16 = mul(x = linear_171_cast_fp16, y = var_3295_to_fp16)[name = tensor("op_3296_cast_fp16")]; tensor input_1005_cast_fp16 = add(x = input_993_cast_fp16, y = var_3296_cast_fp16)[name = tensor("input_1005_cast_fp16")]; tensor input_1007_axes_0 = const()[name = tensor("input_1007_axes_0"), val = tensor([-1])]; - tensor model_layers_18_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468904128)))]; - tensor model_layers_18_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468906240)))]; + tensor model_layers_18_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_18_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468865216)))]; + tensor model_layers_18_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_18_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468867328)))]; tensor input_1007_cast_fp16 = layer_norm(axes = input_1007_axes_0, beta = model_layers_18_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_18_norm_out_weight_to_fp16, x = input_1005_cast_fp16)[name = tensor("input_1007_cast_fp16")]; tensor input_1009_axes_0 = const()[name = tensor("input_1009_axes_0"), val = tensor([-1])]; - tensor model_layers_19_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468908352)))]; - tensor model_layers_19_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468910464)))]; + tensor model_layers_19_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468869440)))]; + tensor model_layers_19_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468871552)))]; tensor input_1009_cast_fp16 = layer_norm(axes = input_1009_axes_0, beta = model_layers_19_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_19_norm_feed_forward1_weight_to_fp16, x = input_1007_cast_fp16)[name = tensor("input_1009_cast_fp16")]; - tensor model_layers_19_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468912576))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473106944))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_19_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(468873664))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473068032))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_172_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_19_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1009_cast_fp16)[name = tensor("linear_172_cast_fp16")]; tensor input_1013_cast_fp16 = silu(x = linear_172_cast_fp16)[name = tensor("input_1013_cast_fp16")]; - tensor model_layers_19_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473115200))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477309568))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_19_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473076288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477270656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_173_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_19_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1013_cast_fp16)[name = tensor("linear_173_cast_fp16")]; tensor var_3324_to_fp16 = const()[name = tensor("op_3324_to_fp16"), val = tensor(0x1p-1)]; tensor var_3325_cast_fp16 = mul(x = linear_173_cast_fp16, y = var_3324_to_fp16)[name = tensor("op_3325_cast_fp16")]; tensor input_1019_cast_fp16 = add(x = input_1007_cast_fp16, y = var_3325_cast_fp16)[name = tensor("input_1019_cast_fp16")]; tensor query_39_axes_0 = const()[name = tensor("query_39_axes_0"), val = tensor([-1])]; - tensor model_layers_19_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477311680)))]; - tensor model_layers_19_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477313792)))]; + tensor model_layers_19_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477272768)))]; + tensor model_layers_19_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477274880)))]; tensor query_39_cast_fp16 = layer_norm(axes = query_39_axes_0, beta = model_layers_19_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_19_norm_self_att_weight_to_fp16, x = input_1019_cast_fp16)[name = tensor("query_39_cast_fp16")]; - tensor model_layers_19_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477315904))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(478364544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_19_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(477276992))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(478325632))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_174_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_19_self_attn_linear_q_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor("linear_174_cast_fp16")]; tensor var_3341 = const()[name = tensor("op_3341"), val = tensor([1, -1, 8, 128])]; tensor q_115_cast_fp16 = reshape(shape = var_3341, x = linear_174_cast_fp16)[name = tensor("q_115_cast_fp16")]; - tensor model_layers_19_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(478366656))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479415296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_19_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(478327744))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479376384))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_175_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_19_self_attn_linear_k_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor("linear_175_cast_fp16")]; tensor var_3345 = const()[name = tensor("op_3345"), val = tensor([1, -1, 8, 128])]; tensor k_77_cast_fp16 = reshape(shape = var_3345, x = linear_175_cast_fp16)[name = tensor("k_77_cast_fp16")]; - tensor model_layers_19_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479417408))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480466048))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_19_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(479378496))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480427136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_176_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_19_self_attn_linear_v_weight_to_fp16_quantized, x = query_39_cast_fp16)[name = tensor("linear_176_cast_fp16")]; tensor var_3349 = const()[name = tensor("op_3349"), val = tensor([1, -1, 8, 128])]; tensor v_39_cast_fp16 = reshape(shape = var_3349, x = linear_176_cast_fp16)[name = tensor("v_39_cast_fp16")]; tensor value_39_perm_0 = const()[name = tensor("value_39_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_19_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_19_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480468160)))]; - tensor var_3361_cast_fp16 = add(x = q_115_cast_fp16, y = model_layers_19_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3361_cast_fp16")]; - tensor model_layers_19_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_19_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480470272)))]; - tensor var_3363_cast_fp16 = add(x = q_115_cast_fp16, y = model_layers_19_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3363_cast_fp16")]; + tensor model_layers_19_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480429248))), scale = tensor([0x1.0f8p-7, 0x1.a58p-9, 0x1.acp-9, 0x1.9c8p-8, 0x1.1dp-8, 0x1.f9cp-9, 0x1.bbp-8, 0x1.954p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3361_cast_fp16 = add(x = q_115_cast_fp16, y = model_layers_19_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_3361_cast_fp16")]; + tensor model_layers_19_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480430336))), scale = tensor([0x1.754p-9, 0x1.2f8p-7, 0x1.3c8p-7, 0x1.074p-7, 0x1.3ap-7, 0x1.8b4p-8, 0x1.d74p-8, 0x1.ec8p-9]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3363_cast_fp16 = add(x = q_115_cast_fp16, y = model_layers_19_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_3363_cast_fp16")]; tensor q_with_bias_v_39_perm_0 = const()[name = tensor("q_with_bias_v_39_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_425_transpose_x_0 = const()[name = tensor("x_425_transpose_x_0"), val = tensor(false)]; tensor x_425_transpose_y_0 = const()[name = tensor("x_425_transpose_y_0"), val = tensor(false)]; - tensor op_3365_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3365_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480472384))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480729472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_3365_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3365_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480431424))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480688512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_39_cast_fp16 = transpose(perm = q_with_bias_v_39_perm_0, x = var_3363_cast_fp16)[name = tensor("transpose_178")]; tensor x_425_cast_fp16 = matmul(transpose_x = x_425_transpose_x_0, transpose_y = x_425_transpose_y_0, x = q_with_bias_v_39_cast_fp16, y = op_3365_to_fp16_quantized)[name = tensor("x_425_cast_fp16")]; tensor x_427_pad_0 = const()[name = tensor("x_427_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -2787,12 +2787,12 @@ program(1.0) tensor var_3398 = const()[name = tensor("op_3398"), val = tensor([1, -1, 1024])]; tensor var_3397_cast_fp16 = transpose(perm = var_3397_perm_0, x = x_431_cast_fp16)[name = tensor("transpose_174")]; tensor input_1023_cast_fp16 = reshape(shape = var_3398, x = var_3397_cast_fp16)[name = tensor("input_1023_cast_fp16")]; - tensor model_layers_19_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480730048))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481778688))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_19_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(480689088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481737728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_178_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_19_self_attn_linear_out_weight_to_fp16_quantized, x = input_1023_cast_fp16)[name = tensor("linear_178_cast_fp16")]; tensor input_1027_cast_fp16 = add(x = input_1019_cast_fp16, y = linear_178_cast_fp16)[name = tensor("input_1027_cast_fp16")]; tensor x_435_axes_0 = const()[name = tensor("x_435_axes_0"), val = tensor([-1])]; - tensor model_layers_19_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481780800)))]; - tensor model_layers_19_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481782912)))]; + tensor model_layers_19_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481739840)))]; + tensor model_layers_19_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481741952)))]; tensor x_435_cast_fp16 = layer_norm(axes = x_435_axes_0, beta = model_layers_19_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_19_norm_conv_weight_to_fp16, x = input_1027_cast_fp16)[name = tensor("x_435_cast_fp16")]; tensor input_1029_perm_0 = const()[name = tensor("input_1029_perm_0"), val = tensor([0, 2, 1])]; tensor input_1031_pad_type_0 = const()[name = tensor("input_1031_pad_type_0"), val = tensor("valid")]; @@ -2800,7 +2800,7 @@ program(1.0) tensor input_1031_pad_0 = const()[name = tensor("input_1031_pad_0"), val = tensor([0, 0])]; tensor input_1031_dilations_0 = const()[name = tensor("input_1031_dilations_0"), val = tensor([1])]; tensor input_1031_groups_0 = const()[name = tensor("input_1031_groups_0"), val = tensor(1)]; - tensor model_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481785024))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483882240))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(481744064))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483841280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_1029_cast_fp16 = transpose(perm = input_1029_perm_0, x = x_435_cast_fp16)[name = tensor("transpose_173")]; tensor input_1031_cast_fp16 = conv(dilations = input_1031_dilations_0, groups = input_1031_groups_0, pad = input_1031_pad_0, pad_type = input_1031_pad_type_0, strides = input_1031_strides_0, weight = model_layers_19_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1029_cast_fp16)[name = tensor("input_1031_cast_fp16")]; tensor x_437_split_num_splits_0 = const()[name = tensor("x_437_split_num_splits_0"), val = tensor(2)]; @@ -2818,8 +2818,8 @@ program(1.0) tensor input_1037_strides_0 = const()[name = tensor("input_1037_strides_0"), val = tensor([1])]; tensor input_1037_pad_0 = const()[name = tensor("input_1037_pad_0"), val = tensor([0, 0])]; tensor input_1037_dilations_0 = const()[name = tensor("input_1037_dilations_0"), val = tensor([1])]; - tensor const_286_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_286_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483886400))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483895680))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_287_to_fp16 = const()[name = tensor("const_287_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483897792)))]; + tensor const_286_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_286_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483845440))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483854720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_287_to_fp16 = const()[name = tensor("const_287_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483856832)))]; tensor input_1039_cast_fp16 = conv(bias = const_287_to_fp16, dilations = input_1037_dilations_0, groups = input_1037_groups_0, pad = input_1037_pad_0, pad_type = input_1037_pad_type_0, strides = input_1037_strides_0, weight = const_286_to_fp16_quantized, x = input_1035_cast_fp16)[name = tensor("input_1039_cast_fp16")]; tensor input_1041_cast_fp16 = silu(x = input_1039_cast_fp16)[name = tensor("input_1041_cast_fp16")]; tensor x_439_pad_type_0 = const()[name = tensor("x_439_pad_type_0"), val = tensor("valid")]; @@ -2827,64 +2827,64 @@ program(1.0) tensor x_439_pad_0 = const()[name = tensor("x_439_pad_0"), val = tensor([0, 0])]; tensor x_439_dilations_0 = const()[name = tensor("x_439_dilations_0"), val = tensor([1])]; tensor x_439_groups_0 = const()[name = tensor("x_439_groups_0"), val = tensor(1)]; - tensor model_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483899904))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484948544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483858944))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484907584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_439_cast_fp16 = conv(dilations = x_439_dilations_0, groups = x_439_groups_0, pad = x_439_pad_0, pad_type = x_439_pad_type_0, strides = x_439_strides_0, weight = model_layers_19_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1041_cast_fp16)[name = tensor("x_439_cast_fp16")]; tensor input_1043_perm_0 = const()[name = tensor("input_1043_perm_0"), val = tensor([0, 2, 1])]; tensor input_1043_cast_fp16 = transpose(perm = input_1043_perm_0, x = x_439_cast_fp16)[name = tensor("transpose_172")]; tensor input_1045_cast_fp16 = add(x = input_1027_cast_fp16, y = input_1043_cast_fp16)[name = tensor("input_1045_cast_fp16")]; tensor input_1047_axes_0 = const()[name = tensor("input_1047_axes_0"), val = tensor([-1])]; - tensor model_layers_19_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484950656)))]; - tensor model_layers_19_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484952768)))]; + tensor model_layers_19_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484909696)))]; + tensor model_layers_19_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484911808)))]; tensor input_1047_cast_fp16 = layer_norm(axes = input_1047_axes_0, beta = model_layers_19_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_19_norm_feed_forward2_weight_to_fp16, x = input_1045_cast_fp16)[name = tensor("input_1047_cast_fp16")]; - tensor model_layers_19_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484954880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489149248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_19_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(484913920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489108288))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_179_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_19_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1047_cast_fp16)[name = tensor("linear_179_cast_fp16")]; tensor input_1051_cast_fp16 = silu(x = linear_179_cast_fp16)[name = tensor("input_1051_cast_fp16")]; - tensor model_layers_19_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489157504))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493351872))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_19_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_19_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489116544))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493310912))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_180_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_19_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1051_cast_fp16)[name = tensor("linear_180_cast_fp16")]; tensor var_3458_to_fp16 = const()[name = tensor("op_3458_to_fp16"), val = tensor(0x1p-1)]; tensor var_3459_cast_fp16 = mul(x = linear_180_cast_fp16, y = var_3458_to_fp16)[name = tensor("op_3459_cast_fp16")]; tensor input_1057_cast_fp16 = add(x = input_1045_cast_fp16, y = var_3459_cast_fp16)[name = tensor("input_1057_cast_fp16")]; tensor input_1059_axes_0 = const()[name = tensor("input_1059_axes_0"), val = tensor([-1])]; - tensor model_layers_19_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493353984)))]; - tensor model_layers_19_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493356096)))]; + tensor model_layers_19_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_19_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493313024)))]; + tensor model_layers_19_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_19_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493315136)))]; tensor input_1059_cast_fp16 = layer_norm(axes = input_1059_axes_0, beta = model_layers_19_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_19_norm_out_weight_to_fp16, x = input_1057_cast_fp16)[name = tensor("input_1059_cast_fp16")]; tensor input_1061_axes_0 = const()[name = tensor("input_1061_axes_0"), val = tensor([-1])]; - tensor model_layers_20_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493358208)))]; - tensor model_layers_20_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493360320)))]; + tensor model_layers_20_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493317248)))]; + tensor model_layers_20_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493319360)))]; tensor input_1061_cast_fp16 = layer_norm(axes = input_1061_axes_0, beta = model_layers_20_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_20_norm_feed_forward1_weight_to_fp16, x = input_1059_cast_fp16)[name = tensor("input_1061_cast_fp16")]; - tensor model_layers_20_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493362432))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497556800))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_20_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493321472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497515840))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_20_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1061_cast_fp16)[name = tensor("linear_181_cast_fp16")]; tensor input_1065_cast_fp16 = silu(x = linear_181_cast_fp16)[name = tensor("input_1065_cast_fp16")]; - tensor model_layers_20_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497565056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501759424))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_20_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497524096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501718464))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_182_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_20_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1065_cast_fp16)[name = tensor("linear_182_cast_fp16")]; tensor var_3487_to_fp16 = const()[name = tensor("op_3487_to_fp16"), val = tensor(0x1p-1)]; tensor var_3488_cast_fp16 = mul(x = linear_182_cast_fp16, y = var_3487_to_fp16)[name = tensor("op_3488_cast_fp16")]; tensor input_1071_cast_fp16 = add(x = input_1059_cast_fp16, y = var_3488_cast_fp16)[name = tensor("input_1071_cast_fp16")]; tensor query_41_axes_0 = const()[name = tensor("query_41_axes_0"), val = tensor([-1])]; - tensor model_layers_20_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501761536)))]; - tensor model_layers_20_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501763648)))]; + tensor model_layers_20_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501720576)))]; + tensor model_layers_20_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501722688)))]; tensor query_41_cast_fp16 = layer_norm(axes = query_41_axes_0, beta = model_layers_20_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_20_norm_self_att_weight_to_fp16, x = input_1071_cast_fp16)[name = tensor("query_41_cast_fp16")]; - tensor model_layers_20_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501765760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(502814400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_20_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(501724800))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(502773440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_183_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_20_self_attn_linear_q_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor("linear_183_cast_fp16")]; tensor var_3504 = const()[name = tensor("op_3504"), val = tensor([1, -1, 8, 128])]; tensor q_121_cast_fp16 = reshape(shape = var_3504, x = linear_183_cast_fp16)[name = tensor("q_121_cast_fp16")]; - tensor model_layers_20_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(502816512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503865152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_20_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(502775552))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503824192))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_184_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_20_self_attn_linear_k_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor("linear_184_cast_fp16")]; tensor var_3508 = const()[name = tensor("op_3508"), val = tensor([1, -1, 8, 128])]; tensor k_81_cast_fp16 = reshape(shape = var_3508, x = linear_184_cast_fp16)[name = tensor("k_81_cast_fp16")]; - tensor model_layers_20_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503867264))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504915904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_20_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503826304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504874944))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_185_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_20_self_attn_linear_v_weight_to_fp16_quantized, x = query_41_cast_fp16)[name = tensor("linear_185_cast_fp16")]; tensor var_3512 = const()[name = tensor("op_3512"), val = tensor([1, -1, 8, 128])]; tensor v_41_cast_fp16 = reshape(shape = var_3512, x = linear_185_cast_fp16)[name = tensor("v_41_cast_fp16")]; tensor value_41_perm_0 = const()[name = tensor("value_41_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_20_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_20_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504918016)))]; - tensor var_3524_cast_fp16 = add(x = q_121_cast_fp16, y = model_layers_20_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3524_cast_fp16")]; - tensor model_layers_20_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_20_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504920128)))]; - tensor var_3526_cast_fp16 = add(x = q_121_cast_fp16, y = model_layers_20_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3526_cast_fp16")]; + tensor model_layers_20_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504877056))), scale = tensor([0x1.f6cp-9, 0x1.6f4p-8, 0x1.14p-8, 0x1.abp-8, 0x1.194p-7, 0x1.f5p-8, 0x1.074p-8, 0x1.33p-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3524_cast_fp16 = add(x = q_121_cast_fp16, y = model_layers_20_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_3524_cast_fp16")]; + tensor model_layers_20_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504878144))), scale = tensor([0x1.5bp-7, 0x1.4bp-7, 0x1.1e8p-7, 0x1.1bp-8, 0x1.d98p-10, 0x1.41cp-7, 0x1.7b8p-8, 0x1.52cp-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3526_cast_fp16 = add(x = q_121_cast_fp16, y = model_layers_20_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_3526_cast_fp16")]; tensor q_with_bias_v_41_perm_0 = const()[name = tensor("q_with_bias_v_41_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_447_transpose_x_0 = const()[name = tensor("x_447_transpose_x_0"), val = tensor(false)]; tensor x_447_transpose_y_0 = const()[name = tensor("x_447_transpose_y_0"), val = tensor(false)]; - tensor op_3528_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3528_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504922240))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505179328))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_3528_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3528_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(504879232))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505136320))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_41_cast_fp16 = transpose(perm = q_with_bias_v_41_perm_0, x = var_3526_cast_fp16)[name = tensor("transpose_171")]; tensor x_447_cast_fp16 = matmul(transpose_x = x_447_transpose_x_0, transpose_y = x_447_transpose_y_0, x = q_with_bias_v_41_cast_fp16, y = op_3528_to_fp16_quantized)[name = tensor("x_447_cast_fp16")]; tensor x_449_pad_0 = const()[name = tensor("x_449_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -2924,12 +2924,12 @@ program(1.0) tensor var_3561 = const()[name = tensor("op_3561"), val = tensor([1, -1, 1024])]; tensor var_3560_cast_fp16 = transpose(perm = var_3560_perm_0, x = x_453_cast_fp16)[name = tensor("transpose_167")]; tensor input_1075_cast_fp16 = reshape(shape = var_3561, x = var_3560_cast_fp16)[name = tensor("input_1075_cast_fp16")]; - tensor model_layers_20_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505179904))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506228544))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_20_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505136896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506185536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_187_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_20_self_attn_linear_out_weight_to_fp16_quantized, x = input_1075_cast_fp16)[name = tensor("linear_187_cast_fp16")]; tensor input_1079_cast_fp16 = add(x = input_1071_cast_fp16, y = linear_187_cast_fp16)[name = tensor("input_1079_cast_fp16")]; tensor x_457_axes_0 = const()[name = tensor("x_457_axes_0"), val = tensor([-1])]; - tensor model_layers_20_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506230656)))]; - tensor model_layers_20_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506232768)))]; + tensor model_layers_20_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506187648)))]; + tensor model_layers_20_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506189760)))]; tensor x_457_cast_fp16 = layer_norm(axes = x_457_axes_0, beta = model_layers_20_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_20_norm_conv_weight_to_fp16, x = input_1079_cast_fp16)[name = tensor("x_457_cast_fp16")]; tensor input_1081_perm_0 = const()[name = tensor("input_1081_perm_0"), val = tensor([0, 2, 1])]; tensor input_1083_pad_type_0 = const()[name = tensor("input_1083_pad_type_0"), val = tensor("valid")]; @@ -2937,7 +2937,7 @@ program(1.0) tensor input_1083_pad_0 = const()[name = tensor("input_1083_pad_0"), val = tensor([0, 0])]; tensor input_1083_dilations_0 = const()[name = tensor("input_1083_dilations_0"), val = tensor([1])]; tensor input_1083_groups_0 = const()[name = tensor("input_1083_groups_0"), val = tensor(1)]; - tensor model_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506234880))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508332096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(506191872))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508289088))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_1081_cast_fp16 = transpose(perm = input_1081_perm_0, x = x_457_cast_fp16)[name = tensor("transpose_166")]; tensor input_1083_cast_fp16 = conv(dilations = input_1083_dilations_0, groups = input_1083_groups_0, pad = input_1083_pad_0, pad_type = input_1083_pad_type_0, strides = input_1083_strides_0, weight = model_layers_20_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1081_cast_fp16)[name = tensor("input_1083_cast_fp16")]; tensor x_459_split_num_splits_0 = const()[name = tensor("x_459_split_num_splits_0"), val = tensor(2)]; @@ -2955,8 +2955,8 @@ program(1.0) tensor input_1089_strides_0 = const()[name = tensor("input_1089_strides_0"), val = tensor([1])]; tensor input_1089_pad_0 = const()[name = tensor("input_1089_pad_0"), val = tensor([0, 0])]; tensor input_1089_dilations_0 = const()[name = tensor("input_1089_dilations_0"), val = tensor([1])]; - tensor const_288_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_288_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508336256))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508345536))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_289_to_fp16 = const()[name = tensor("const_289_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508347648)))]; + tensor const_288_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_288_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508293248))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508302528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_289_to_fp16 = const()[name = tensor("const_289_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508304640)))]; tensor input_1091_cast_fp16 = conv(bias = const_289_to_fp16, dilations = input_1089_dilations_0, groups = input_1089_groups_0, pad = input_1089_pad_0, pad_type = input_1089_pad_type_0, strides = input_1089_strides_0, weight = const_288_to_fp16_quantized, x = input_1087_cast_fp16)[name = tensor("input_1091_cast_fp16")]; tensor input_1093_cast_fp16 = silu(x = input_1091_cast_fp16)[name = tensor("input_1093_cast_fp16")]; tensor x_461_pad_type_0 = const()[name = tensor("x_461_pad_type_0"), val = tensor("valid")]; @@ -2964,64 +2964,64 @@ program(1.0) tensor x_461_pad_0 = const()[name = tensor("x_461_pad_0"), val = tensor([0, 0])]; tensor x_461_dilations_0 = const()[name = tensor("x_461_dilations_0"), val = tensor([1])]; tensor x_461_groups_0 = const()[name = tensor("x_461_groups_0"), val = tensor(1)]; - tensor model_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508349760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509398400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(508306752))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509355392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_461_cast_fp16 = conv(dilations = x_461_dilations_0, groups = x_461_groups_0, pad = x_461_pad_0, pad_type = x_461_pad_type_0, strides = x_461_strides_0, weight = model_layers_20_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1093_cast_fp16)[name = tensor("x_461_cast_fp16")]; tensor input_1095_perm_0 = const()[name = tensor("input_1095_perm_0"), val = tensor([0, 2, 1])]; tensor input_1095_cast_fp16 = transpose(perm = input_1095_perm_0, x = x_461_cast_fp16)[name = tensor("transpose_165")]; tensor input_1097_cast_fp16 = add(x = input_1079_cast_fp16, y = input_1095_cast_fp16)[name = tensor("input_1097_cast_fp16")]; tensor input_1099_axes_0 = const()[name = tensor("input_1099_axes_0"), val = tensor([-1])]; - tensor model_layers_20_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509400512)))]; - tensor model_layers_20_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509402624)))]; + tensor model_layers_20_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509357504)))]; + tensor model_layers_20_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509359616)))]; tensor input_1099_cast_fp16 = layer_norm(axes = input_1099_axes_0, beta = model_layers_20_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_20_norm_feed_forward2_weight_to_fp16, x = input_1097_cast_fp16)[name = tensor("input_1099_cast_fp16")]; - tensor model_layers_20_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509404736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(513599104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_20_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(509361728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(513556096))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_20_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1099_cast_fp16)[name = tensor("linear_188_cast_fp16")]; tensor input_1103_cast_fp16 = silu(x = linear_188_cast_fp16)[name = tensor("input_1103_cast_fp16")]; - tensor model_layers_20_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(513607360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517801728))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_20_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_20_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(513564352))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517758720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_189_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_20_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1103_cast_fp16)[name = tensor("linear_189_cast_fp16")]; tensor var_3621_to_fp16 = const()[name = tensor("op_3621_to_fp16"), val = tensor(0x1p-1)]; tensor var_3622_cast_fp16 = mul(x = linear_189_cast_fp16, y = var_3621_to_fp16)[name = tensor("op_3622_cast_fp16")]; tensor input_1109_cast_fp16 = add(x = input_1097_cast_fp16, y = var_3622_cast_fp16)[name = tensor("input_1109_cast_fp16")]; tensor input_1111_axes_0 = const()[name = tensor("input_1111_axes_0"), val = tensor([-1])]; - tensor model_layers_20_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517803840)))]; - tensor model_layers_20_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517805952)))]; + tensor model_layers_20_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_20_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517760832)))]; + tensor model_layers_20_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_20_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517762944)))]; tensor input_1111_cast_fp16 = layer_norm(axes = input_1111_axes_0, beta = model_layers_20_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_20_norm_out_weight_to_fp16, x = input_1109_cast_fp16)[name = tensor("input_1111_cast_fp16")]; tensor input_1113_axes_0 = const()[name = tensor("input_1113_axes_0"), val = tensor([-1])]; - tensor model_layers_21_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517808064)))]; - tensor model_layers_21_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517810176)))]; + tensor model_layers_21_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517765056)))]; + tensor model_layers_21_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517767168)))]; tensor input_1113_cast_fp16 = layer_norm(axes = input_1113_axes_0, beta = model_layers_21_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_21_norm_feed_forward1_weight_to_fp16, x = input_1111_cast_fp16)[name = tensor("input_1113_cast_fp16")]; - tensor model_layers_21_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517812288))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(522006656))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_21_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517769280))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521963648))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_21_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1113_cast_fp16)[name = tensor("linear_190_cast_fp16")]; tensor input_1117_cast_fp16 = silu(x = linear_190_cast_fp16)[name = tensor("input_1117_cast_fp16")]; - tensor model_layers_21_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(522014912))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526209280))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_21_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(521971904))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526166272))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_191_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_21_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1117_cast_fp16)[name = tensor("linear_191_cast_fp16")]; tensor var_3650_to_fp16 = const()[name = tensor("op_3650_to_fp16"), val = tensor(0x1p-1)]; tensor var_3651_cast_fp16 = mul(x = linear_191_cast_fp16, y = var_3650_to_fp16)[name = tensor("op_3651_cast_fp16")]; tensor input_1123_cast_fp16 = add(x = input_1111_cast_fp16, y = var_3651_cast_fp16)[name = tensor("input_1123_cast_fp16")]; tensor query_43_axes_0 = const()[name = tensor("query_43_axes_0"), val = tensor([-1])]; - tensor model_layers_21_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526211392)))]; - tensor model_layers_21_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526213504)))]; + tensor model_layers_21_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526168384)))]; + tensor model_layers_21_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526170496)))]; tensor query_43_cast_fp16 = layer_norm(axes = query_43_axes_0, beta = model_layers_21_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_21_norm_self_att_weight_to_fp16, x = input_1123_cast_fp16)[name = tensor("query_43_cast_fp16")]; - tensor model_layers_21_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526215616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(527264256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_21_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(526172608))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(527221248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_192_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_21_self_attn_linear_q_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor("linear_192_cast_fp16")]; tensor var_3667 = const()[name = tensor("op_3667"), val = tensor([1, -1, 8, 128])]; tensor q_127_cast_fp16 = reshape(shape = var_3667, x = linear_192_cast_fp16)[name = tensor("q_127_cast_fp16")]; - tensor model_layers_21_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(527266368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528315008))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_21_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(527223360))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528272000))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_193_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_21_self_attn_linear_k_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor("linear_193_cast_fp16")]; tensor var_3671 = const()[name = tensor("op_3671"), val = tensor([1, -1, 8, 128])]; tensor k_85_cast_fp16 = reshape(shape = var_3671, x = linear_193_cast_fp16)[name = tensor("k_85_cast_fp16")]; - tensor model_layers_21_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528317120))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529365760))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_21_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(528274112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529322752))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_194_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_21_self_attn_linear_v_weight_to_fp16_quantized, x = query_43_cast_fp16)[name = tensor("linear_194_cast_fp16")]; tensor var_3675 = const()[name = tensor("op_3675"), val = tensor([1, -1, 8, 128])]; tensor v_43_cast_fp16 = reshape(shape = var_3675, x = linear_194_cast_fp16)[name = tensor("v_43_cast_fp16")]; tensor value_43_perm_0 = const()[name = tensor("value_43_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_21_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_21_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529367872)))]; - tensor var_3687_cast_fp16 = add(x = q_127_cast_fp16, y = model_layers_21_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3687_cast_fp16")]; - tensor model_layers_21_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_21_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529369984)))]; - tensor var_3689_cast_fp16 = add(x = q_127_cast_fp16, y = model_layers_21_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3689_cast_fp16")]; + tensor model_layers_21_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529324864))), scale = tensor([0x1.cdp-8, 0x1.1bcp-7, 0x1.194p-8, 0x1.018p-8, 0x1.15cp-7, 0x1.3bcp-8, 0x1.c4cp-8, 0x1.68cp-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3687_cast_fp16 = add(x = q_127_cast_fp16, y = model_layers_21_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_3687_cast_fp16")]; + tensor model_layers_21_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529325952))), scale = tensor([0x1.09cp-7, 0x1.c14p-8, 0x1.15cp-7, 0x1.404p-7, 0x1.edcp-8, 0x1.a24p-8, 0x1.66p-7, 0x1.20cp-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3689_cast_fp16 = add(x = q_127_cast_fp16, y = model_layers_21_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_3689_cast_fp16")]; tensor q_with_bias_v_43_perm_0 = const()[name = tensor("q_with_bias_v_43_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_469_transpose_x_0 = const()[name = tensor("x_469_transpose_x_0"), val = tensor(false)]; tensor x_469_transpose_y_0 = const()[name = tensor("x_469_transpose_y_0"), val = tensor(false)]; - tensor op_3691_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3691_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529372096))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529629184))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_3691_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3691_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529327040))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529584128))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_43_cast_fp16 = transpose(perm = q_with_bias_v_43_perm_0, x = var_3689_cast_fp16)[name = tensor("transpose_164")]; tensor x_469_cast_fp16 = matmul(transpose_x = x_469_transpose_x_0, transpose_y = x_469_transpose_y_0, x = q_with_bias_v_43_cast_fp16, y = op_3691_to_fp16_quantized)[name = tensor("x_469_cast_fp16")]; tensor x_471_pad_0 = const()[name = tensor("x_471_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -3061,12 +3061,12 @@ program(1.0) tensor var_3724 = const()[name = tensor("op_3724"), val = tensor([1, -1, 1024])]; tensor var_3723_cast_fp16 = transpose(perm = var_3723_perm_0, x = x_475_cast_fp16)[name = tensor("transpose_160")]; tensor input_1127_cast_fp16 = reshape(shape = var_3724, x = var_3723_cast_fp16)[name = tensor("input_1127_cast_fp16")]; - tensor model_layers_21_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529629760))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530678400))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_21_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529584704))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530633344))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_196_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_21_self_attn_linear_out_weight_to_fp16_quantized, x = input_1127_cast_fp16)[name = tensor("linear_196_cast_fp16")]; tensor input_1131_cast_fp16 = add(x = input_1123_cast_fp16, y = linear_196_cast_fp16)[name = tensor("input_1131_cast_fp16")]; tensor x_479_axes_0 = const()[name = tensor("x_479_axes_0"), val = tensor([-1])]; - tensor model_layers_21_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530680512)))]; - tensor model_layers_21_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530682624)))]; + tensor model_layers_21_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530635456)))]; + tensor model_layers_21_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530637568)))]; tensor x_479_cast_fp16 = layer_norm(axes = x_479_axes_0, beta = model_layers_21_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_21_norm_conv_weight_to_fp16, x = input_1131_cast_fp16)[name = tensor("x_479_cast_fp16")]; tensor input_1133_perm_0 = const()[name = tensor("input_1133_perm_0"), val = tensor([0, 2, 1])]; tensor input_1135_pad_type_0 = const()[name = tensor("input_1135_pad_type_0"), val = tensor("valid")]; @@ -3074,7 +3074,7 @@ program(1.0) tensor input_1135_pad_0 = const()[name = tensor("input_1135_pad_0"), val = tensor([0, 0])]; tensor input_1135_dilations_0 = const()[name = tensor("input_1135_dilations_0"), val = tensor([1])]; tensor input_1135_groups_0 = const()[name = tensor("input_1135_groups_0"), val = tensor(1)]; - tensor model_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530684736))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532781952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(530639680))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532736896))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_1133_cast_fp16 = transpose(perm = input_1133_perm_0, x = x_479_cast_fp16)[name = tensor("transpose_159")]; tensor input_1135_cast_fp16 = conv(dilations = input_1135_dilations_0, groups = input_1135_groups_0, pad = input_1135_pad_0, pad_type = input_1135_pad_type_0, strides = input_1135_strides_0, weight = model_layers_21_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1133_cast_fp16)[name = tensor("input_1135_cast_fp16")]; tensor x_481_split_num_splits_0 = const()[name = tensor("x_481_split_num_splits_0"), val = tensor(2)]; @@ -3092,8 +3092,8 @@ program(1.0) tensor input_1141_strides_0 = const()[name = tensor("input_1141_strides_0"), val = tensor([1])]; tensor input_1141_pad_0 = const()[name = tensor("input_1141_pad_0"), val = tensor([0, 0])]; tensor input_1141_dilations_0 = const()[name = tensor("input_1141_dilations_0"), val = tensor([1])]; - tensor const_290_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_290_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532786112))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532795392))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_291_to_fp16 = const()[name = tensor("const_291_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532797504)))]; + tensor const_290_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_290_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532741056))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532750336))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_291_to_fp16 = const()[name = tensor("const_291_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532752448)))]; tensor input_1143_cast_fp16 = conv(bias = const_291_to_fp16, dilations = input_1141_dilations_0, groups = input_1141_groups_0, pad = input_1141_pad_0, pad_type = input_1141_pad_type_0, strides = input_1141_strides_0, weight = const_290_to_fp16_quantized, x = input_1139_cast_fp16)[name = tensor("input_1143_cast_fp16")]; tensor input_1145_cast_fp16 = silu(x = input_1143_cast_fp16)[name = tensor("input_1145_cast_fp16")]; tensor x_483_pad_type_0 = const()[name = tensor("x_483_pad_type_0"), val = tensor("valid")]; @@ -3101,64 +3101,64 @@ program(1.0) tensor x_483_pad_0 = const()[name = tensor("x_483_pad_0"), val = tensor([0, 0])]; tensor x_483_dilations_0 = const()[name = tensor("x_483_dilations_0"), val = tensor([1])]; tensor x_483_groups_0 = const()[name = tensor("x_483_groups_0"), val = tensor(1)]; - tensor model_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532799616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533848256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532754560))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533803200))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_483_cast_fp16 = conv(dilations = x_483_dilations_0, groups = x_483_groups_0, pad = x_483_pad_0, pad_type = x_483_pad_type_0, strides = x_483_strides_0, weight = model_layers_21_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1145_cast_fp16)[name = tensor("x_483_cast_fp16")]; tensor input_1147_perm_0 = const()[name = tensor("input_1147_perm_0"), val = tensor([0, 2, 1])]; tensor input_1147_cast_fp16 = transpose(perm = input_1147_perm_0, x = x_483_cast_fp16)[name = tensor("transpose_158")]; tensor input_1149_cast_fp16 = add(x = input_1131_cast_fp16, y = input_1147_cast_fp16)[name = tensor("input_1149_cast_fp16")]; tensor input_1151_axes_0 = const()[name = tensor("input_1151_axes_0"), val = tensor([-1])]; - tensor model_layers_21_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533850368)))]; - tensor model_layers_21_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533852480)))]; + tensor model_layers_21_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533805312)))]; + tensor model_layers_21_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533807424)))]; tensor input_1151_cast_fp16 = layer_norm(axes = input_1151_axes_0, beta = model_layers_21_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_21_norm_feed_forward2_weight_to_fp16, x = input_1149_cast_fp16)[name = tensor("input_1151_cast_fp16")]; - tensor model_layers_21_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533854592))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(538048960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_21_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(533809536))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(538003904))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_197_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_21_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1151_cast_fp16)[name = tensor("linear_197_cast_fp16")]; tensor input_1155_cast_fp16 = silu(x = linear_197_cast_fp16)[name = tensor("input_1155_cast_fp16")]; - tensor model_layers_21_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(538057216))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542251584))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_21_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_21_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(538012160))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542206528))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_198_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_21_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1155_cast_fp16)[name = tensor("linear_198_cast_fp16")]; tensor var_3784_to_fp16 = const()[name = tensor("op_3784_to_fp16"), val = tensor(0x1p-1)]; tensor var_3785_cast_fp16 = mul(x = linear_198_cast_fp16, y = var_3784_to_fp16)[name = tensor("op_3785_cast_fp16")]; tensor input_1161_cast_fp16 = add(x = input_1149_cast_fp16, y = var_3785_cast_fp16)[name = tensor("input_1161_cast_fp16")]; tensor input_1163_axes_0 = const()[name = tensor("input_1163_axes_0"), val = tensor([-1])]; - tensor model_layers_21_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542253696)))]; - tensor model_layers_21_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542255808)))]; + tensor model_layers_21_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_21_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542208640)))]; + tensor model_layers_21_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_21_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542210752)))]; tensor input_1163_cast_fp16 = layer_norm(axes = input_1163_axes_0, beta = model_layers_21_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_21_norm_out_weight_to_fp16, x = input_1161_cast_fp16)[name = tensor("input_1163_cast_fp16")]; tensor input_1165_axes_0 = const()[name = tensor("input_1165_axes_0"), val = tensor([-1])]; - tensor model_layers_22_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542257920)))]; - tensor model_layers_22_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542260032)))]; + tensor model_layers_22_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542212864)))]; + tensor model_layers_22_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542214976)))]; tensor input_1165_cast_fp16 = layer_norm(axes = input_1165_axes_0, beta = model_layers_22_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_22_norm_feed_forward1_weight_to_fp16, x = input_1163_cast_fp16)[name = tensor("input_1165_cast_fp16")]; - tensor model_layers_22_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542262144))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(546456512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_22_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(542217088))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(546411456))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_199_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_22_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1165_cast_fp16)[name = tensor("linear_199_cast_fp16")]; tensor input_1169_cast_fp16 = silu(x = linear_199_cast_fp16)[name = tensor("input_1169_cast_fp16")]; - tensor model_layers_22_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(546464768))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550659136))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_22_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(546419712))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550614080))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_200_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_22_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1169_cast_fp16)[name = tensor("linear_200_cast_fp16")]; tensor var_3813_to_fp16 = const()[name = tensor("op_3813_to_fp16"), val = tensor(0x1p-1)]; tensor var_3814_cast_fp16 = mul(x = linear_200_cast_fp16, y = var_3813_to_fp16)[name = tensor("op_3814_cast_fp16")]; tensor input_1175_cast_fp16 = add(x = input_1163_cast_fp16, y = var_3814_cast_fp16)[name = tensor("input_1175_cast_fp16")]; tensor query_45_axes_0 = const()[name = tensor("query_45_axes_0"), val = tensor([-1])]; - tensor model_layers_22_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550661248)))]; - tensor model_layers_22_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550663360)))]; + tensor model_layers_22_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550616192)))]; + tensor model_layers_22_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550618304)))]; tensor query_45_cast_fp16 = layer_norm(axes = query_45_axes_0, beta = model_layers_22_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_22_norm_self_att_weight_to_fp16, x = input_1175_cast_fp16)[name = tensor("query_45_cast_fp16")]; - tensor model_layers_22_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550665472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(551714112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_22_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550620416))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(551669056))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_201_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_22_self_attn_linear_q_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor("linear_201_cast_fp16")]; tensor var_3830 = const()[name = tensor("op_3830"), val = tensor([1, -1, 8, 128])]; tensor q_133_cast_fp16 = reshape(shape = var_3830, x = linear_201_cast_fp16)[name = tensor("q_133_cast_fp16")]; - tensor model_layers_22_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(551716224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(552764864))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_22_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(551671168))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(552719808))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_202_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_22_self_attn_linear_k_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor("linear_202_cast_fp16")]; tensor var_3834 = const()[name = tensor("op_3834"), val = tensor([1, -1, 8, 128])]; tensor k_89_cast_fp16 = reshape(shape = var_3834, x = linear_202_cast_fp16)[name = tensor("k_89_cast_fp16")]; - tensor model_layers_22_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(552766976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553815616))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_22_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(552721920))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553770560))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_203_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_22_self_attn_linear_v_weight_to_fp16_quantized, x = query_45_cast_fp16)[name = tensor("linear_203_cast_fp16")]; tensor var_3838 = const()[name = tensor("op_3838"), val = tensor([1, -1, 8, 128])]; tensor v_45_cast_fp16 = reshape(shape = var_3838, x = linear_203_cast_fp16)[name = tensor("v_45_cast_fp16")]; tensor value_45_perm_0 = const()[name = tensor("value_45_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_22_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_22_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553817728)))]; - tensor var_3850_cast_fp16 = add(x = q_133_cast_fp16, y = model_layers_22_self_attn_pos_bias_u_to_fp16)[name = tensor("op_3850_cast_fp16")]; - tensor model_layers_22_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_22_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553819840)))]; - tensor var_3852_cast_fp16 = add(x = q_133_cast_fp16, y = model_layers_22_self_attn_pos_bias_v_to_fp16)[name = tensor("op_3852_cast_fp16")]; + tensor model_layers_22_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553772672))), scale = tensor([0x1.184p-8, 0x1.3e8p-7, 0x1.584p-7, 0x1.becp-8, 0x1.ce8p-7, 0x1.48cp-8, 0x1.57p-8, 0x1.c8p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3850_cast_fp16 = add(x = q_133_cast_fp16, y = model_layers_22_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_3850_cast_fp16")]; + tensor model_layers_22_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553773760))), scale = tensor([0x1.794p-8, 0x1.d48p-9, 0x1.2c4p-8, 0x1.e9cp-8, 0x1.01cp-7, 0x1.a14p-7, 0x1.06p-7, 0x1.81cp-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_3852_cast_fp16 = add(x = q_133_cast_fp16, y = model_layers_22_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_3852_cast_fp16")]; tensor q_with_bias_v_45_perm_0 = const()[name = tensor("q_with_bias_v_45_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_491_transpose_x_0 = const()[name = tensor("x_491_transpose_x_0"), val = tensor(false)]; tensor x_491_transpose_y_0 = const()[name = tensor("x_491_transpose_y_0"), val = tensor(false)]; - tensor op_3854_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3854_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553821952))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554079040))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_3854_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_3854_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(553774848))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554031936))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_45_cast_fp16 = transpose(perm = q_with_bias_v_45_perm_0, x = var_3852_cast_fp16)[name = tensor("transpose_157")]; tensor x_491_cast_fp16 = matmul(transpose_x = x_491_transpose_x_0, transpose_y = x_491_transpose_y_0, x = q_with_bias_v_45_cast_fp16, y = op_3854_to_fp16_quantized)[name = tensor("x_491_cast_fp16")]; tensor x_493_pad_0 = const()[name = tensor("x_493_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -3198,12 +3198,12 @@ program(1.0) tensor var_3887 = const()[name = tensor("op_3887"), val = tensor([1, -1, 1024])]; tensor var_3886_cast_fp16 = transpose(perm = var_3886_perm_0, x = x_497_cast_fp16)[name = tensor("transpose_153")]; tensor input_1179_cast_fp16 = reshape(shape = var_3887, x = var_3886_cast_fp16)[name = tensor("input_1179_cast_fp16")]; - tensor model_layers_22_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554079616))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555128256))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_22_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(554032512))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555081152))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_205_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_22_self_attn_linear_out_weight_to_fp16_quantized, x = input_1179_cast_fp16)[name = tensor("linear_205_cast_fp16")]; tensor input_1183_cast_fp16 = add(x = input_1175_cast_fp16, y = linear_205_cast_fp16)[name = tensor("input_1183_cast_fp16")]; tensor x_501_axes_0 = const()[name = tensor("x_501_axes_0"), val = tensor([-1])]; - tensor model_layers_22_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555130368)))]; - tensor model_layers_22_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555132480)))]; + tensor model_layers_22_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555083264)))]; + tensor model_layers_22_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555085376)))]; tensor x_501_cast_fp16 = layer_norm(axes = x_501_axes_0, beta = model_layers_22_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_22_norm_conv_weight_to_fp16, x = input_1183_cast_fp16)[name = tensor("x_501_cast_fp16")]; tensor input_1185_perm_0 = const()[name = tensor("input_1185_perm_0"), val = tensor([0, 2, 1])]; tensor input_1187_pad_type_0 = const()[name = tensor("input_1187_pad_type_0"), val = tensor("valid")]; @@ -3211,7 +3211,7 @@ program(1.0) tensor input_1187_pad_0 = const()[name = tensor("input_1187_pad_0"), val = tensor([0, 0])]; tensor input_1187_dilations_0 = const()[name = tensor("input_1187_dilations_0"), val = tensor([1])]; tensor input_1187_groups_0 = const()[name = tensor("input_1187_groups_0"), val = tensor(1)]; - tensor model_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555134592))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557231808))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(555087488))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557184704))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_1185_cast_fp16 = transpose(perm = input_1185_perm_0, x = x_501_cast_fp16)[name = tensor("transpose_152")]; tensor input_1187_cast_fp16 = conv(dilations = input_1187_dilations_0, groups = input_1187_groups_0, pad = input_1187_pad_0, pad_type = input_1187_pad_type_0, strides = input_1187_strides_0, weight = model_layers_22_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1185_cast_fp16)[name = tensor("input_1187_cast_fp16")]; tensor x_503_split_num_splits_0 = const()[name = tensor("x_503_split_num_splits_0"), val = tensor(2)]; @@ -3229,8 +3229,8 @@ program(1.0) tensor input_1193_strides_0 = const()[name = tensor("input_1193_strides_0"), val = tensor([1])]; tensor input_1193_pad_0 = const()[name = tensor("input_1193_pad_0"), val = tensor([0, 0])]; tensor input_1193_dilations_0 = const()[name = tensor("input_1193_dilations_0"), val = tensor([1])]; - tensor const_292_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_292_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557235968))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557245248))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_293_to_fp16 = const()[name = tensor("const_293_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557247360)))]; + tensor const_292_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_292_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557188864))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557198144))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_293_to_fp16 = const()[name = tensor("const_293_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557200256)))]; tensor input_1195_cast_fp16 = conv(bias = const_293_to_fp16, dilations = input_1193_dilations_0, groups = input_1193_groups_0, pad = input_1193_pad_0, pad_type = input_1193_pad_type_0, strides = input_1193_strides_0, weight = const_292_to_fp16_quantized, x = input_1191_cast_fp16)[name = tensor("input_1195_cast_fp16")]; tensor input_1197_cast_fp16 = silu(x = input_1195_cast_fp16)[name = tensor("input_1197_cast_fp16")]; tensor x_505_pad_type_0 = const()[name = tensor("x_505_pad_type_0"), val = tensor("valid")]; @@ -3238,64 +3238,64 @@ program(1.0) tensor x_505_pad_0 = const()[name = tensor("x_505_pad_0"), val = tensor([0, 0])]; tensor x_505_dilations_0 = const()[name = tensor("x_505_dilations_0"), val = tensor([1])]; tensor x_505_groups_0 = const()[name = tensor("x_505_groups_0"), val = tensor(1)]; - tensor model_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557249472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558298112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(557202368))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558251008))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_505_cast_fp16 = conv(dilations = x_505_dilations_0, groups = x_505_groups_0, pad = x_505_pad_0, pad_type = x_505_pad_type_0, strides = x_505_strides_0, weight = model_layers_22_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1197_cast_fp16)[name = tensor("x_505_cast_fp16")]; tensor input_1199_perm_0 = const()[name = tensor("input_1199_perm_0"), val = tensor([0, 2, 1])]; tensor input_1199_cast_fp16 = transpose(perm = input_1199_perm_0, x = x_505_cast_fp16)[name = tensor("transpose_151")]; tensor input_1201_cast_fp16 = add(x = input_1183_cast_fp16, y = input_1199_cast_fp16)[name = tensor("input_1201_cast_fp16")]; tensor input_1203_axes_0 = const()[name = tensor("input_1203_axes_0"), val = tensor([-1])]; - tensor model_layers_22_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558300224)))]; - tensor model_layers_22_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558302336)))]; + tensor model_layers_22_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558253120)))]; + tensor model_layers_22_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558255232)))]; tensor input_1203_cast_fp16 = layer_norm(axes = input_1203_axes_0, beta = model_layers_22_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_22_norm_feed_forward2_weight_to_fp16, x = input_1201_cast_fp16)[name = tensor("input_1203_cast_fp16")]; - tensor model_layers_22_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558304448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562498816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_22_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(558257344))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562451712))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_206_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_22_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1203_cast_fp16)[name = tensor("linear_206_cast_fp16")]; tensor input_1207_cast_fp16 = silu(x = linear_206_cast_fp16)[name = tensor("input_1207_cast_fp16")]; - tensor model_layers_22_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562507072))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566701440))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_22_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_22_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(562459968))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566654336))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_207_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_22_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1207_cast_fp16)[name = tensor("linear_207_cast_fp16")]; tensor var_3947_to_fp16 = const()[name = tensor("op_3947_to_fp16"), val = tensor(0x1p-1)]; tensor var_3948_cast_fp16 = mul(x = linear_207_cast_fp16, y = var_3947_to_fp16)[name = tensor("op_3948_cast_fp16")]; tensor input_1213_cast_fp16 = add(x = input_1201_cast_fp16, y = var_3948_cast_fp16)[name = tensor("input_1213_cast_fp16")]; tensor input_1215_axes_0 = const()[name = tensor("input_1215_axes_0"), val = tensor([-1])]; - tensor model_layers_22_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566703552)))]; - tensor model_layers_22_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566705664)))]; + tensor model_layers_22_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_22_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566656448)))]; + tensor model_layers_22_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_22_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566658560)))]; tensor input_1215_cast_fp16 = layer_norm(axes = input_1215_axes_0, beta = model_layers_22_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_22_norm_out_weight_to_fp16, x = input_1213_cast_fp16)[name = tensor("input_1215_cast_fp16")]; tensor input_1217_axes_0 = const()[name = tensor("input_1217_axes_0"), val = tensor([-1])]; - tensor model_layers_23_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566707776)))]; - tensor model_layers_23_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566709888)))]; + tensor model_layers_23_norm_feed_forward1_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566660672)))]; + tensor model_layers_23_norm_feed_forward1_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566662784)))]; tensor input_1217_cast_fp16 = layer_norm(axes = input_1217_axes_0, beta = model_layers_23_norm_feed_forward1_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_23_norm_feed_forward1_weight_to_fp16, x = input_1215_cast_fp16)[name = tensor("input_1217_cast_fp16")]; - tensor model_layers_23_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566712000))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570906368))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_23_feed_forward1_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward1_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(566664896))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570859264))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_208_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_23_feed_forward1_linear1_weight_to_fp16_quantized, x = input_1217_cast_fp16)[name = tensor("linear_208_cast_fp16")]; tensor input_1221_cast_fp16 = silu(x = linear_208_cast_fp16)[name = tensor("input_1221_cast_fp16")]; - tensor model_layers_23_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570914624))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575108992))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_23_feed_forward1_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward1_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(570867520))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575061888))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_209_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_23_feed_forward1_linear2_weight_to_fp16_quantized, x = input_1221_cast_fp16)[name = tensor("linear_209_cast_fp16")]; tensor var_3976_to_fp16 = const()[name = tensor("op_3976_to_fp16"), val = tensor(0x1p-1)]; tensor var_3977_cast_fp16 = mul(x = linear_209_cast_fp16, y = var_3976_to_fp16)[name = tensor("op_3977_cast_fp16")]; tensor input_1227_cast_fp16 = add(x = input_1215_cast_fp16, y = var_3977_cast_fp16)[name = tensor("input_1227_cast_fp16")]; tensor query_axes_0 = const()[name = tensor("query_axes_0"), val = tensor([-1])]; - tensor model_layers_23_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575111104)))]; - tensor model_layers_23_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575113216)))]; + tensor model_layers_23_norm_self_att_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_self_att_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575064000)))]; + tensor model_layers_23_norm_self_att_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_self_att_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575066112)))]; tensor query_cast_fp16 = layer_norm(axes = query_axes_0, beta = model_layers_23_norm_self_att_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_23_norm_self_att_weight_to_fp16, x = input_1227_cast_fp16)[name = tensor("query_cast_fp16")]; - tensor model_layers_23_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575115328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(576163968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_23_self_attn_linear_q_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_q_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575068224))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(576116864))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_210_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_23_self_attn_linear_q_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor("linear_210_cast_fp16")]; tensor var_3993 = const()[name = tensor("op_3993"), val = tensor([1, -1, 8, 128])]; tensor q_139_cast_fp16 = reshape(shape = var_3993, x = linear_210_cast_fp16)[name = tensor("q_139_cast_fp16")]; - tensor model_layers_23_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(576166080))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577214720))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_23_self_attn_linear_k_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_k_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(576118976))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577167616))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_211_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_23_self_attn_linear_k_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor("linear_211_cast_fp16")]; tensor var_3997 = const()[name = tensor("op_3997"), val = tensor([1, -1, 8, 128])]; tensor k_93_cast_fp16 = reshape(shape = var_3997, x = linear_211_cast_fp16)[name = tensor("k_93_cast_fp16")]; - tensor model_layers_23_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577216832))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578265472))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_23_self_attn_linear_v_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_v_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(577169728))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578218368))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_212_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_23_self_attn_linear_v_weight_to_fp16_quantized, x = query_cast_fp16)[name = tensor("linear_212_cast_fp16")]; tensor var_4001 = const()[name = tensor("op_4001"), val = tensor([1, -1, 8, 128])]; tensor v_cast_fp16 = reshape(shape = var_4001, x = linear_212_cast_fp16)[name = tensor("v_cast_fp16")]; tensor value_perm_0 = const()[name = tensor("value_perm_0"), val = tensor([0, 2, -3, -1])]; - tensor model_layers_23_self_attn_pos_bias_u_to_fp16 = const()[name = tensor("model_layers_23_self_attn_pos_bias_u_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578267584)))]; - tensor var_4013_cast_fp16 = add(x = q_139_cast_fp16, y = model_layers_23_self_attn_pos_bias_u_to_fp16)[name = tensor("op_4013_cast_fp16")]; - tensor model_layers_23_self_attn_pos_bias_v_to_fp16 = const()[name = tensor("model_layers_23_self_attn_pos_bias_v_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578269696)))]; - tensor var_4015_cast_fp16 = add(x = q_139_cast_fp16, y = model_layers_23_self_attn_pos_bias_v_to_fp16)[name = tensor("op_4015_cast_fp16")]; + tensor model_layers_23_self_attn_pos_bias_u_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_pos_bias_u_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578220480))), scale = tensor([0x1.21p-7, 0x1.8p-8, 0x1.544p-8, 0x1.a88p-8, 0x1.01p-7, 0x1.95p-8, 0x1.2dcp-8, 0x1.39p-7]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_4013_cast_fp16 = add(x = q_139_cast_fp16, y = model_layers_23_self_attn_pos_bias_u_to_fp16_quantized)[name = tensor("op_4013_cast_fp16")]; + tensor model_layers_23_self_attn_pos_bias_v_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_pos_bias_v_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578221568))), scale = tensor([0x1.42cp-7, 0x1.bfcp-8, 0x1.864p-8, 0x1.0f4p-7, 0x1.7d8p-9, 0x1.c1cp-8, 0x1.ce8p-9, 0x1.81cp-8]), zero_point = tensor([0, 0, 0, 0, 0, 0, 0, 0])]; + tensor var_4015_cast_fp16 = add(x = q_139_cast_fp16, y = model_layers_23_self_attn_pos_bias_v_to_fp16_quantized)[name = tensor("op_4015_cast_fp16")]; tensor q_with_bias_v_perm_0 = const()[name = tensor("q_with_bias_v_perm_0"), val = tensor([0, 2, -3, -1])]; tensor x_513_transpose_x_0 = const()[name = tensor("x_513_transpose_x_0"), val = tensor(false)]; tensor x_513_transpose_y_0 = const()[name = tensor("x_513_transpose_y_0"), val = tensor(false)]; - tensor op_4017_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_4017_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578271808))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578528896))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16179776)))]; + tensor op_4017_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(3), name = tensor("op_4017_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578222656))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578479744))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16177728)))]; tensor q_with_bias_v_cast_fp16 = transpose(perm = q_with_bias_v_perm_0, x = var_4015_cast_fp16)[name = tensor("transpose_150")]; tensor x_513_cast_fp16 = matmul(transpose_x = x_513_transpose_x_0, transpose_y = x_513_transpose_y_0, x = q_with_bias_v_cast_fp16, y = op_4017_to_fp16_quantized)[name = tensor("x_513_cast_fp16")]; tensor x_515_pad_0 = const()[name = tensor("x_515_pad_0"), val = tensor([0, 0, 0, 0, 0, 0, 1, 0])]; @@ -3335,12 +3335,12 @@ program(1.0) tensor var_4050 = const()[name = tensor("op_4050"), val = tensor([1, -1, 1024])]; tensor var_4049_cast_fp16 = transpose(perm = var_4049_perm_0, x = x_519_cast_fp16)[name = tensor("transpose_146")]; tensor input_1231_cast_fp16 = reshape(shape = var_4050, x = var_4049_cast_fp16)[name = tensor("input_1231_cast_fp16")]; - tensor model_layers_23_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578529472))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579578112))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_23_self_attn_linear_out_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_self_attn_linear_out_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578480320))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579528960))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_214_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_23_self_attn_linear_out_weight_to_fp16_quantized, x = input_1231_cast_fp16)[name = tensor("linear_214_cast_fp16")]; tensor input_1235_cast_fp16 = add(x = input_1227_cast_fp16, y = linear_214_cast_fp16)[name = tensor("input_1235_cast_fp16")]; tensor x_523_axes_0 = const()[name = tensor("x_523_axes_0"), val = tensor([-1])]; - tensor model_layers_23_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579580224)))]; - tensor model_layers_23_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579582336)))]; + tensor model_layers_23_norm_conv_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579531072)))]; + tensor model_layers_23_norm_conv_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579533184)))]; tensor x_523_cast_fp16 = layer_norm(axes = x_523_axes_0, beta = model_layers_23_norm_conv_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_23_norm_conv_weight_to_fp16, x = input_1235_cast_fp16)[name = tensor("x_523_cast_fp16")]; tensor input_1237_perm_0 = const()[name = tensor("input_1237_perm_0"), val = tensor([0, 2, 1])]; tensor input_1239_pad_type_0 = const()[name = tensor("input_1239_pad_type_0"), val = tensor("valid")]; @@ -3348,7 +3348,7 @@ program(1.0) tensor input_1239_pad_0 = const()[name = tensor("input_1239_pad_0"), val = tensor([0, 0])]; tensor input_1239_dilations_0 = const()[name = tensor("input_1239_dilations_0"), val = tensor([1])]; tensor input_1239_groups_0 = const()[name = tensor("input_1239_groups_0"), val = tensor(1)]; - tensor model_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579584448))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581681664))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19332864)))]; + tensor model_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(579535296))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581632512))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19330816)))]; tensor input_1237_cast_fp16 = transpose(perm = input_1237_perm_0, x = x_523_cast_fp16)[name = tensor("transpose_145")]; tensor input_1239_cast_fp16 = conv(dilations = input_1239_dilations_0, groups = input_1239_groups_0, pad = input_1239_pad_0, pad_type = input_1239_pad_type_0, strides = input_1239_strides_0, weight = model_layers_23_conv_pointwise_conv1_weight_to_fp16_quantized, x = input_1237_cast_fp16)[name = tensor("input_1239_cast_fp16")]; tensor x_525_split_num_splits_0 = const()[name = tensor("x_525_split_num_splits_0"), val = tensor(2)]; @@ -3366,8 +3366,8 @@ program(1.0) tensor input_1245_strides_0 = const()[name = tensor("input_1245_strides_0"), val = tensor([1])]; tensor input_1245_pad_0 = const()[name = tensor("input_1245_pad_0"), val = tensor([0, 0])]; tensor input_1245_dilations_0 = const()[name = tensor("input_1245_dilations_0"), val = tensor([1])]; - tensor const_294_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_294_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581685824))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581695104))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; - tensor const_295_to_fp16 = const()[name = tensor("const_295_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581697216)))]; + tensor const_294_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("const_294_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581636672))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581645952))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor const_295_to_fp16 = const()[name = tensor("const_295_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581648064)))]; tensor input_1247_cast_fp16 = conv(bias = const_295_to_fp16, dilations = input_1245_dilations_0, groups = input_1245_groups_0, pad = input_1245_pad_0, pad_type = input_1245_pad_type_0, strides = input_1245_strides_0, weight = const_294_to_fp16_quantized, x = input_1243_cast_fp16)[name = tensor("input_1247_cast_fp16")]; tensor input_1249_cast_fp16 = silu(x = input_1247_cast_fp16)[name = tensor("input_1249_cast_fp16")]; tensor x_527_pad_type_0 = const()[name = tensor("x_527_pad_type_0"), val = tensor("valid")]; @@ -3375,26 +3375,26 @@ program(1.0) tensor x_527_pad_0 = const()[name = tensor("x_527_pad_0"), val = tensor([0, 0])]; tensor x_527_dilations_0 = const()[name = tensor("x_527_dilations_0"), val = tensor([1])]; tensor x_527_groups_0 = const()[name = tensor("x_527_groups_0"), val = tensor(1)]; - tensor model_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581699328))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582747968))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(581650176))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582698816))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor x_527_cast_fp16 = conv(dilations = x_527_dilations_0, groups = x_527_groups_0, pad = x_527_pad_0, pad_type = x_527_pad_type_0, strides = x_527_strides_0, weight = model_layers_23_conv_pointwise_conv2_weight_to_fp16_quantized, x = input_1249_cast_fp16)[name = tensor("x_527_cast_fp16")]; tensor input_1251_perm_0 = const()[name = tensor("input_1251_perm_0"), val = tensor([0, 2, 1])]; tensor input_1251_cast_fp16 = transpose(perm = input_1251_perm_0, x = x_527_cast_fp16)[name = tensor("transpose_144")]; tensor input_1253_cast_fp16 = add(x = input_1235_cast_fp16, y = input_1251_cast_fp16)[name = tensor("input_1253_cast_fp16")]; tensor input_1255_axes_0 = const()[name = tensor("input_1255_axes_0"), val = tensor([-1])]; - tensor model_layers_23_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582750080)))]; - tensor model_layers_23_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582752192)))]; + tensor model_layers_23_norm_feed_forward2_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582700928)))]; + tensor model_layers_23_norm_feed_forward2_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_feed_forward2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582703040)))]; tensor input_1255_cast_fp16 = layer_norm(axes = input_1255_axes_0, beta = model_layers_23_norm_feed_forward2_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_23_norm_feed_forward2_weight_to_fp16, x = input_1253_cast_fp16)[name = tensor("input_1255_cast_fp16")]; - tensor model_layers_23_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582754304))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586948672))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; + tensor model_layers_23_feed_forward2_linear1_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward2_linear1_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582705152))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586899520))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8542720)))]; tensor linear_215_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_layers_23_feed_forward2_linear1_weight_to_fp16_quantized, x = input_1255_cast_fp16)[name = tensor("linear_215_cast_fp16")]; tensor input_1259_cast_fp16 = silu(x = linear_215_cast_fp16)[name = tensor("input_1259_cast_fp16")]; - tensor model_layers_23_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586956928))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591151296))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; + tensor model_layers_23_feed_forward2_linear2_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor(0), name = tensor("model_layers_23_feed_forward2_linear2_weight_to_fp16_quantized"), quantized_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(586907776))), scale = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591102144))), zero_point = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4338816)))]; tensor linear_216_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = model_layers_23_feed_forward2_linear2_weight_to_fp16_quantized, x = input_1259_cast_fp16)[name = tensor("linear_216_cast_fp16")]; tensor var_4110_to_fp16 = const()[name = tensor("op_4110_to_fp16"), val = tensor(0x1p-1)]; tensor var_4111_cast_fp16 = mul(x = linear_216_cast_fp16, y = var_4110_to_fp16)[name = tensor("op_4111_cast_fp16")]; tensor input_cast_fp16 = add(x = input_1253_cast_fp16, y = var_4111_cast_fp16)[name = tensor("input_cast_fp16")]; tensor audio_signal_axes_0 = const()[name = tensor("audio_signal_axes_0"), val = tensor([-1])]; - tensor model_layers_23_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591153408)))]; - tensor model_layers_23_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591155520)))]; + tensor model_layers_23_norm_out_weight_to_fp16 = const()[name = tensor("model_layers_23_norm_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591104256)))]; + tensor model_layers_23_norm_out_bias_to_fp16 = const()[name = tensor("model_layers_23_norm_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591106368)))]; tensor var_4124_cast_fp16 = layer_norm(axes = audio_signal_axes_0, beta = model_layers_23_norm_out_bias_to_fp16, epsilon = var_4_to_fp16, gamma = model_layers_23_norm_out_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_4124_cast_fp16")]; tensor var_4124_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_4124_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor encoder_output = cast(dtype = var_4124_cast_fp16_to_fp32_dtype_0, x = var_4124_cast_fp16)[name = tensor("cast_0")];