From 903bf0147e49ab9d370926627f384df533d49603 Mon Sep 17 00:00:00 2001 From: Zihao Mu Date: Tue, 1 Nov 2022 00:06:31 +0800 Subject: [PATCH] Merge pull request #22666 from zihaomu:support_onnx_qdq_model DNN: let Quant and Dequant of ONNX_importer support the Constant input. * let Quant and Dequant support the Constant input. * fix negative value of axis. --- .../dnn/include/opencv2/dnn/all_layers.hpp | 8 +- .../dnn/src/int8layers/quantization_utils.cpp | 161 ++++++++++++++++-- modules/dnn/src/onnx/onnx_importer.cpp | 133 ++++++++++----- modules/dnn/test/test_onnx_importer.cpp | 19 ++- 4 files changed, 261 insertions(+), 60 deletions(-) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 46c5f338af..d74566e57c 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -422,16 +422,16 @@ CV__DNN_INLINE_NS_BEGIN class CV_EXPORTS QuantizeLayer : public Layer { public: - float scale; - int zeropoint; + std::vector scales; + std::vector zeropoints; static Ptr create(const LayerParams ¶ms); }; class CV_EXPORTS DequantizeLayer : public Layer { public: - float scale; - int zeropoint; + std::vector scales; + std::vector zeropoints; static Ptr create(const LayerParams ¶ms); }; diff --git a/modules/dnn/src/int8layers/quantization_utils.cpp b/modules/dnn/src/int8layers/quantization_utils.cpp index 6e2f0bb61c..a4a822efdd 100644 --- a/modules/dnn/src/int8layers/quantization_utils.cpp +++ b/modules/dnn/src/int8layers/quantization_utils.cpp @@ -11,14 +11,88 @@ namespace cv namespace dnn { +static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int axis) +{ + // The data is the 1-D scales or zeropoints. + CV_Assert(axis >= 0 && targetShape.size() > axis && data.total() == targetShape[axis]); + std::vector broadcast_axes; + for (int i = 0; i < targetShape.size(); i++) + { + if (i != axis) + broadcast_axes.push_back(i); + } + + MatShape subTargetShape = shape(data); + + // convert std::vector to 1D Mat. + for (auto broadcast_axis : broadcast_axes) + { + subTargetShape[broadcast_axis] = targetShape[broadcast_axis]; + data = data.reshape(0, total(data, 0, broadcast_axis)); + Mat tmp = cv::repeat(data, 1, subTargetShape[broadcast_axis]); + data = tmp.reshape(0, subTargetShape); + } +} + +static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector& scales, + const std::vector& zeropoints, const MatShape& targetShape, int axis) +{ + // broad cast the scales and zeropoint to the input shape. + MatShape subTargetShape(targetShape.size(), 1); + subTargetShape[axis] = scales.size(); + + zeropointsMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1); + scalesMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1); + + const int len = scales.size(); + // Deep copy the scales and zeropoint data and prevent the original data from being changed. + + float * scalePtr = scalesMat.ptr(0); + for (int i = 0; i < len; i++) + scalePtr[i] = scales[i]; + + float * zpPtr = zeropointsMat.ptr(0); + for (int i = 0; i < len; i++) + zpPtr[i] = (float )zeropoints[i]; + + broadcast1D2TargetMat(scalesMat, targetShape, axis); + broadcast1D2TargetMat(zeropointsMat, targetShape, axis); +} + // Quantize FP32/FP16 Inputs to INT8 class QuantizeLayerImpl CV_FINAL : public QuantizeLayer { public: + int axis; + bool is1D; + Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data. + QuantizeLayerImpl(const LayerParams& params) { - scale = params.get("scales", 1.0f); - zeropoint = params.get("zeropoints", 0); + is1D = params.get("is1D", false); + axis = params.get("axis", 1); + if (!is1D) + { + scales.push_back(params.get("scales", 1.0f)); + zeropoints.push_back(params.get("zeropoints", 0)); + } + else + { + DictValue paramScales = params.get("scales"); + int i, n = paramScales.size(); + + CV_Assert(n > 0); + scales.resize(n, 0.); + for (i = 0; i < n; i++) + scales[i] = paramScales.get(i); + + zeropoints.resize(n, 0); + DictValue paramZp = params.get("zeropoints"); + n = paramZp.size(); + + for (i = 0; i < n; i++) + zeropoints[i] = paramZp.get(i); + } setParamsFrom(params); } @@ -42,6 +116,14 @@ public: std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); + + axis = normalize_axis(axis, shape(inputs[0]).size()); + + if (is1D) + { + MatShape inputShape = shape(inputs[0]); + broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis); + } } #ifdef HAVE_OPENCL @@ -58,7 +140,7 @@ public: inputs[0] = inputFp32; // replace } - inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint); + inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]); return true; } #endif @@ -68,14 +150,26 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !is1D, forward_ocl(inputs_arr, outputs_arr, internals_arr)) std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint); + if (outputs[0].depth() != CV_8S) + outputs[0].convertTo(outputs[0], CV_8S); + + if (is1D) + { + Mat inputTmp; + divide(inputs[0], scalesMat, inputTmp); + subtract(inputTmp, zeropointsMat, inputTmp); + + inputTmp.convertTo(outputs[0], CV_8S); + } + else + inputs[0].convertTo(outputs[0], CV_8S, 1.f/scales[0], zeropoints[0]); } }; @@ -83,10 +177,38 @@ public: class DequantizeLayerImpl CV_FINAL : public DequantizeLayer { public: + int axis; + bool is1D; + Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data. + DequantizeLayerImpl(const LayerParams& params) { - scale = params.get("scales", 1.0f); - zeropoint = params.get("zeropoints", 0); + is1D = params.get("is1D", false); + axis = params.get("axis", 1); + + if (!is1D) + { + scales.push_back(params.get("scales", 1.0f)); + zeropoints.push_back(params.get("zeropoints", 0)); + } + else + { + DictValue paramScales = params.get("scales"); + int i, n = paramScales.size(); + + CV_Assert(n > 0); + scales.resize(n); + for (i = 0; i < n; i++) + scales[i] = paramScales.get(i); + + zeropoints.resize(n, 0); + DictValue paramZp = params.get("zeropoints"); + n = paramZp.size(); + + for (i = 0; i < n; i++) + zeropoints[i] = paramZp.get(i); + } + setParamsFrom(params); } @@ -110,6 +232,14 @@ public: std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); + + axis = normalize_axis(axis, shape(inputs[0]).size()); + + if (is1D) + { + MatShape inputShape = shape(inputs[0]); + broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis); + } } #ifdef HAVE_OPENCL @@ -120,7 +250,7 @@ public: outputs_.getUMatVector(outputs); UMat outputFp32; - inputs[0].convertTo(outputFp32, CV_32F, scale, -(scale*zeropoint)); + inputs[0].convertTo(outputFp32, CV_32F, scales[0], -(scales[0]*zeropoints[0])); if (outputs_.depth() == CV_16S) convertFp16(outputFp32, outputs[0]); @@ -135,14 +265,25 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && !is1D, forward_ocl(inputs_arr, outputs_arr, internals_arr)) std::vector inputs, outputs; inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - inputs[0].convertTo(outputs[0], CV_32F, scale, -(scale*zeropoint)); + if (outputs[0].depth() != CV_32F) + outputs[0].convertTo(outputs[0], CV_32F); + + if (is1D) + { + Mat inputTmp; + inputs[0].convertTo(inputTmp, CV_32F); + subtract(inputTmp, zeropointsMat, inputTmp); + multiply(inputTmp, scalesMat, outputs[0]); + } + else + inputs[0].convertTo(outputs[0], CV_32F, scales[0], -(scales[0]*zeropoints[0])); } }; diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 9901df0dad..c420f3b5d1 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -53,7 +53,7 @@ extern bool DNN_DIAGNOSTICS_RUN; class ONNXLayerHandler; template -static T getScaleFromMat(Mat m) +static T getScalarFromMat(Mat m) { CV_Assert(m.total() == 1); return m.at(0); @@ -380,7 +380,10 @@ void runLayer(LayerParams& params, const std::vector& inputs, inpShapes[i] = shape(inputs[i]); if (i > 0 && ddepth != inputs[i].depth()) CV_Error(Error::StsNotImplemented, "Mixed input data types."); - ddepth = inputs[i].depth(); + + // Quantize and Dequantize layer have different output type than input. + if (params.type != "Quantize" && params.type != "Dequantize") + ddepth = inputs[i].depth(); } std::vector outShapes, internalShapes; @@ -3240,21 +3243,67 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx { CV_Assert(node_proto.input_size() == 2 || node_proto.input_size() == 3); layerParams.type = (node_proto.op_type() == "QuantizeLinear") ? "Quantize" : "Dequantize"; + int axis = layerParams.get("axis", 1); + // For QuantizeLinear and DequantizeLinear, the scale and zeropoint can be a Scalar (per-tensor quantized) + // or 1-D tensor (per-channel quantized). + bool is1D = false; + + Mat scaleMat = getBlob(node_proto, 1); + if(scaleMat.total() > 1) is1D = true; - float scale = getScaleFromMat(getBlob(node_proto, 1)); - int zeropoint = 0; + Mat zpMat; if (node_proto.input_size() == 3) - zeropoint = (int)getScaleFromMat(getBlob(node_proto, 2)); + { + zpMat = getBlob(node_proto, 2); + CV_Assert(zpMat.total() == scaleMat.total()); // zero point should has the same shape as scale. + } + + if (is1D) + { + const int num = scaleMat.total(); - layerParams.set("scales", scale); - layerParams.set("zeropoints", zeropoint); + std::vector zeropoints(num, 0); + std::vector scales(num, 0); + + for (int i = 0; i < num; i++) + { + scales[i] = scaleMat.at(i); + if (!zpMat.empty()) + zeropoints[i] = zpMat.depth() == CV_32S ? + zpMat.at(i) : (int)zpMat.at(i); + } + + layerParams.set("is1D", true); + layerParams.set("axis", axis); + layerParams.set("scales", DictValue::arrayReal(scales.data(), scales.size())); + layerParams.set("zeropoints", DictValue::arrayInt(zeropoints.data(), zeropoints.size())); + } + else + { + int zeropoint = zpMat.empty() ? 0 : zpMat.depth() == CV_32S ? + getScalarFromMat(zpMat) : (int)getScalarFromMat(zpMat); + float scale = getScalarFromMat(scaleMat); + + layerParams.set("is1D", false); + layerParams.set("scales", scale); + layerParams.set("zeropoints", zeropoint); + } if (layerParams.type == "Quantize") layerParams.set("depth", CV_8S); else // Dequantize layerParams.set("depth", CV_32F); - addLayer(layerParams, node_proto); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) // Variable input. + { + std::vector inputs, outputs; + inputs.push_back(getBlob(node_proto, 0)); + + runLayer(layerParams, inputs, outputs); + addConstant(node_proto.output(0), outputs[0]); + } + else + addLayer(layerParams, node_proto); } void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) @@ -3263,8 +3312,8 @@ void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeP int ninputs = node_proto.input_size(); CV_Assert(ninputs == 8 || ninputs == 9); - float inp_sc = getScaleFromMat(getBlob(node_proto, 1)); - int inp_zp = (int)getScaleFromMat(getBlob(node_proto, 2)); + float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); + int inp_zp = (int)getScalarFromMat(getBlob(node_proto, 2)); if (layerParams.has("pad")) { @@ -3312,8 +3361,8 @@ void ONNXImporter::parseQConv(LayerParams& layerParams, const opencv_onnx::NodeP bool per_channel = w_scale.total() == outCn; Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at(0))); - float out_sc = getScaleFromMat(getBlob(node_proto, 6)); - int8_t out_zp = getScaleFromMat(getBlob(node_proto, 7)); + float out_sc = getScalarFromMat(getBlob(node_proto, 6)); + int8_t out_zp = getScalarFromMat(getBlob(node_proto, 7)); Mat bias = (ninputs == 9) ? getBlob(node_proto, 8) : Mat::zeros(1, outCn, CV_32S); @@ -3349,8 +3398,8 @@ void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::Nod int firstInpDims = outShapes[node_proto.input(0)].size(); - float inp_sc = getScaleFromMat(getBlob(node_proto, 1)); - int8_t inp_zp = getScaleFromMat(getBlob(node_proto, 2)); + float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); + int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); Mat weights = getBlob(node_proto, 3).t(); int outCn = weights.size[0]; @@ -3361,8 +3410,8 @@ void ONNXImporter::parseQMatMul(LayerParams& layerParams, const opencv_onnx::Nod bool per_channel = w_scale.total() == outCn ? true : false; Mat wt_sc = (w_scale.total() == outCn) ? w_scale : Mat(1, outCn, CV_32F, Scalar(w_scale.at(0))); - float out_sc = getScaleFromMat(getBlob(node_proto, 6)); - int8_t out_zp = getScaleFromMat(getBlob(node_proto, 7)); + float out_sc = getScalarFromMat(getBlob(node_proto, 6)); + int8_t out_zp = getScalarFromMat(getBlob(node_proto, 7)); Mat bias(1, outCn, CV_32S); Mat outputMultiplier(1, outCn, CV_32F); @@ -3411,8 +3460,8 @@ void ONNXImporter::parseQGemm(LayerParams& layerParams, const opencv_onnx::NodeP int firstInpDims = outShapes[node_proto.input(0)].size(); - float inp_sc = getScaleFromMat(getBlob(node_proto, 1)); - int8_t inp_zp = getScaleFromMat(getBlob(node_proto, 2)); + float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); + int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); int outCn = weights.size[0]; int secondInpDims = weights.dims; @@ -3431,8 +3480,8 @@ void ONNXImporter::parseQGemm(LayerParams& layerParams, const opencv_onnx::NodeP CV_Error(Error::StsUnsupportedFormat, "The zero-point non-zero case of W is not supported!"); } - float out_sc = getScaleFromMat(getBlob(node_proto, 7)); - int8_t out_zp = ninputs == 9 ? getScaleFromMat(getBlob(node_proto, 8)) : 0; + float out_sc = getScalarFromMat(getBlob(node_proto, 7)); + int8_t out_zp = ninputs == 9 ? getScalarFromMat(getBlob(node_proto, 8)) : 0; Mat bias; if (constBlobs.find(node_proto.input(6)) != constBlobs.end()) @@ -3475,11 +3524,11 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No constId = i; } - float inp_0_sc = getScaleFromMat(getBlob(node_proto, 1)); - int8_t inp_0_zp = getScaleFromMat(getBlob(node_proto, 2)); + float inp_0_sc = getScalarFromMat(getBlob(node_proto, 1)); + int8_t inp_0_zp = getScalarFromMat(getBlob(node_proto, 2)); - float inp_1_sc = getScaleFromMat(getBlob(node_proto, 4)); - int8_t inp_1_zp = getScaleFromMat(getBlob(node_proto, 5)); + float inp_1_sc = getScalarFromMat(getBlob(node_proto, 4)); + int8_t inp_1_zp = getScalarFromMat(getBlob(node_proto, 5)); // Set 2nd input as the const input if (constId == 0) @@ -3488,11 +3537,11 @@ void ONNXImporter::parseQEltwise(LayerParams& layerParams, const opencv_onnx::No cv::swap(inp_0_zp, inp_1_zp); } - float out_sc = getScaleFromMat(getBlob(node_proto, 6)); + float out_sc = getScalarFromMat(getBlob(node_proto, 6)); int8_t out_zp = 0; if (node_proto.input_size() == 8) - out_zp = getScaleFromMat(getBlob(node_proto, 7)); + out_zp = getScalarFromMat(getBlob(node_proto, 7)); std::vector inp_scales = {inp_0_sc, inp_1_sc}; std::vector inp_zps = {inp_0_zp, inp_1_zp}; @@ -3608,10 +3657,10 @@ void ONNXImporter::parseQLeakyRelu(LayerParams& layerParams, const opencv_onnx:: CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5); float slope = layerParams.get("alpha"); - float inp_sc = getScaleFromMat(getBlob(node_proto, 1)); - int8_t inp_zp = getScaleFromMat(getBlob(node_proto, 2)); - float out_sc = getScaleFromMat(getBlob(node_proto, 3)); - int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat(getBlob(node_proto, 4)); + float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); + int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); + float out_sc = getScalarFromMat(getBlob(node_proto, 3)); + int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat(getBlob(node_proto, 4)); Mat lookUpTable(1, 256, CV_8S); int8_t* table = lookUpTable.ptr(); @@ -3637,10 +3686,10 @@ void ONNXImporter::parseQSigmoid(LayerParams& layerParams, const opencv_onnx::No { CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5); - float inp_sc = getScaleFromMat(getBlob(node_proto, 1)); - int8_t inp_zp = getScaleFromMat(getBlob(node_proto, 2)); - float out_sc = getScaleFromMat(getBlob(node_proto, 3)); - int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat(getBlob(node_proto, 4)); + float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); + int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); + float out_sc = getScalarFromMat(getBlob(node_proto, 3)); + int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat(getBlob(node_proto, 4)); Mat lookUpTable(1, 256, CV_8S); int8_t* table = lookUpTable.ptr(); @@ -3665,10 +3714,10 @@ void ONNXImporter::parseQAvgPool(LayerParams& layerParams, const opencv_onnx::No { CV_Assert(node_proto.input_size() == 4 || node_proto.input_size() == 5); - float inp_sc = getScaleFromMat(getBlob(node_proto, 1)); - int8_t inp_zp = getScaleFromMat(getBlob(node_proto, 2)); - float out_sc = getScaleFromMat(getBlob(node_proto, 3)); - int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScaleFromMat(getBlob(node_proto, 4)); + float inp_sc = getScalarFromMat(getBlob(node_proto, 1)); + int8_t inp_zp = getScalarFromMat(getBlob(node_proto, 2)); + float out_sc = getScalarFromMat(getBlob(node_proto, 3)); + int8_t out_zp = node_proto.input_size() == 4 ? 0 : getScalarFromMat(getBlob(node_proto, 4)); layerParams.type = "PoolingInt8"; layerParams.set("pool", "ave"); @@ -3687,13 +3736,13 @@ void ONNXImporter::parseQConcat(LayerParams& layerParams, const opencv_onnx::Nod layerParams.type = "ConcatInt8"; int num_inputs = node_proto.input_size(); - float out_scale = getScaleFromMat(getBlob(node_proto, 0)); - int8_t out_zp = getScaleFromMat(getBlob(node_proto, 1)); + float out_scale = getScalarFromMat(getBlob(node_proto, 0)); + int8_t out_zp = getScalarFromMat(getBlob(node_proto, 1)); for (int i = 2; i < num_inputs; i += 3) { - float inp_scale = getScaleFromMat(getBlob(node_proto, i + 1)); - int8_t inp_zp = getScaleFromMat(getBlob(node_proto, i + 2)); + float inp_scale = getScalarFromMat(getBlob(node_proto, i + 1)); + int8_t inp_zp = getScalarFromMat(getBlob(node_proto, i + 2)); if (inp_scale != out_scale || inp_zp != out_zp) { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index c90195783c..b310dce808 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -1824,11 +1824,22 @@ TEST_P(Test_ONNX_layers, Gemm) TEST_P(Test_ONNX_layers, Quantized_Convolution) { - testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02); - testONNXModels("quantized_conv_int8_weights", npy, 0.03, 0.5); - testONNXModels("quantized_conv_per_channel_weights", npy, 0.06, 0.4); + // The difference of QOperator and QDQ format: + // https://onnxruntime.ai/docs/performance/quantization.html#onnx-quantization-representation-format. + { + SCOPED_TRACE("QOperator quantized model."); + testONNXModels("quantized_conv_uint8_weights", npy, 0.004, 0.02); + testONNXModels("quantized_conv_int8_weights", npy, 0.03, 0.5); + testONNXModels("quantized_conv_per_channel_weights", npy, 0.06, 0.4); + testONNXModels("quantized_conv_asymmetric_pads_int8_weights"); + } - testONNXModels("quantized_conv_asymmetric_pads_int8_weights"); + { + SCOPED_TRACE("QDQ quantized model."); + testONNXModels("quantized_conv_uint8_weights_qdq", npy, 0.004, 0.02); + testONNXModels("quantized_conv_int8_weights_qdq", npy, 0.03, 0.5); + testONNXModels("quantized_conv_per_channel_weights_qdq", npy, 0.06, 0.4); + } } TEST_P(Test_ONNX_layers, Quantized_MatMul)