From 7eaec9dd227e13944c82d62c8da4e875d7aa89d6 Mon Sep 17 00:00:00 2001 From: Zihao Mu Date: Fri, 26 Aug 2022 10:04:44 +0800 Subject: [PATCH] load fp16 as fp32 and align fp16 and double in onnx_graph_simplifie --- .../dnn/src/onnx/onnx_graph_simplifier.cpp | 66 ++++++++++++++++++- modules/dnn/test/test_onnx_importer.cpp | 5 ++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp index c6e54d6a92..c787c0a321 100644 --- a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp +++ b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp @@ -18,6 +18,17 @@ CV__DNN_INLINE_NS_BEGIN extern bool DNN_DIAGNOSTICS_RUN; +static int isLittleEndianCPU() +{ + int x = 7; + char *ptr = (char *)&x; + + if(ptr[0] == 0) + return 0; + else + return 1; +} + // This wrapper can behave differently for fake input nodes and real graph nodes. class ONNXNodeWrapper : public ImportNodeWrapper { @@ -767,11 +778,64 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto) Mat(sizes, CV_32FC1, val).copyTo(blob); } } + else if (datatype == opencv_onnx::TensorProto_DataType_FLOAT16) + { + // FIXME, for now, we only load FP16 Tensor as FP32 Mat, full support for FP16 is required in the future. + CV_LOG_ONCE_WARNING(NULL, "DNN: load FP16 model as FP32 model, and it takes twice the FP16 RAM requirement."); + + // ONNX saves float 16 data in two format: int32 and raw_data. + // Link: https://github.com/onnx/onnx/issues/4460#issuecomment-1224373746 + if (!tensor_proto.int32_data().empty()) + { + const int offset = isLittleEndianCPU() ? 0 : 1; + const ::google::protobuf::RepeatedField field = tensor_proto.int32_data(); + + AutoBuffer aligned_val; + size_t sz = tensor_proto.int32_data().size(); + aligned_val.allocate(sz); + float16_t* bufPtr = aligned_val.data(); + + float16_t *fp16Ptr = (float16_t *)field.data(); + for (int i = 0; i < sz; i++) + { + bufPtr[i] = fp16Ptr[i*2 + offset]; + } + Mat(sizes, CV_16FC1, bufPtr).convertTo(blob, CV_32FC1); + } + else + { + char* val = const_cast(tensor_proto.raw_data().c_str()); +#if CV_STRONG_ALIGNMENT + // Aligned pointer is required. + AutoBuffer aligned_val; + if (!isAligned(val)) + { + size_t sz = tensor_proto.raw_data().size(); + aligned_val.allocate(divUp(sz, sizeof(float16_t))); + memcpy(aligned_val.data(), val, sz); + val = (char*)aligned_val.data(); + } +#endif + Mat(sizes, CV_16FC1, val).convertTo(blob, CV_32FC1); + } + } else if (datatype == opencv_onnx::TensorProto_DataType_DOUBLE) { const ::google::protobuf::RepeatedField field = tensor_proto.double_data(); CV_Assert(!field.empty()); - Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1); + char* val = (char *)field.data(); +#if CV_STRONG_ALIGNMENT + // Aligned pointer is required. + AutoBuffer aligned_val; + if (!isAligned(val)) + { + size_t sz = tensor_proto.raw_data().size(); + aligned_val.allocate(divUp(sz, sizeof(double))); + memcpy(aligned_val.data(), val, sz); + val = (char*)aligned_val.data(); + } +#endif + Mat(sizes, CV_64FC1, val).convertTo(blob, CV_32FC1); } else if (datatype == opencv_onnx::TensorProto_DataType_INT32) { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 578e0442b2..eb1db3396b 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -2098,6 +2098,11 @@ TEST_P(Test_ONNX_nets, MobileNet_v2) testONNXModels("mobilenetv2", pb, default_l1, default_lInf, true); } +TEST_P(Test_ONNX_nets, MobileNet_v2_FP16) +{ + testONNXModels("mobilenetv2_fp16", npy, default_l1, default_lInf, true); +} + TEST_P(Test_ONNX_nets, LResNet100E_IR) { applyTestTag(