|
|
|
@ -18,6 +18,17 @@ CV__DNN_INLINE_NS_BEGIN |
|
|
|
|
|
|
|
|
|
extern bool DNN_DIAGNOSTICS_RUN; |
|
|
|
|
|
|
|
|
|
static int isLittleEndianCPU() |
|
|
|
|
{ |
|
|
|
|
int x = 7; |
|
|
|
|
char *ptr = (char *)&x; |
|
|
|
|
|
|
|
|
|
if(ptr[0] == 0) |
|
|
|
|
return 0; |
|
|
|
|
else |
|
|
|
|
return 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// This wrapper can behave differently for fake input nodes and real graph nodes.
|
|
|
|
|
class ONNXNodeWrapper : public ImportNodeWrapper |
|
|
|
|
{ |
|
|
|
@ -767,11 +778,64 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto) |
|
|
|
|
Mat(sizes, CV_32FC1, val).copyTo(blob); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else if (datatype == opencv_onnx::TensorProto_DataType_FLOAT16) |
|
|
|
|
{ |
|
|
|
|
// FIXME, for now, we only load FP16 Tensor as FP32 Mat, full support for FP16 is required in the future.
|
|
|
|
|
CV_LOG_ONCE_WARNING(NULL, "DNN: load FP16 model as FP32 model, and it takes twice the FP16 RAM requirement."); |
|
|
|
|
|
|
|
|
|
// ONNX saves float 16 data in two format: int32 and raw_data.
|
|
|
|
|
// Link: https://github.com/onnx/onnx/issues/4460#issuecomment-1224373746
|
|
|
|
|
if (!tensor_proto.int32_data().empty()) |
|
|
|
|
{ |
|
|
|
|
const int offset = isLittleEndianCPU() ? 0 : 1; |
|
|
|
|
const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data(); |
|
|
|
|
|
|
|
|
|
AutoBuffer<float16_t, 16> aligned_val; |
|
|
|
|
size_t sz = tensor_proto.int32_data().size(); |
|
|
|
|
aligned_val.allocate(sz); |
|
|
|
|
float16_t* bufPtr = aligned_val.data(); |
|
|
|
|
|
|
|
|
|
float16_t *fp16Ptr = (float16_t *)field.data(); |
|
|
|
|
for (int i = 0; i < sz; i++) |
|
|
|
|
{ |
|
|
|
|
bufPtr[i] = fp16Ptr[i*2 + offset]; |
|
|
|
|
} |
|
|
|
|
Mat(sizes, CV_16FC1, bufPtr).convertTo(blob, CV_32FC1); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
char* val = const_cast<char*>(tensor_proto.raw_data().c_str()); |
|
|
|
|
#if CV_STRONG_ALIGNMENT |
|
|
|
|
// Aligned pointer is required.
|
|
|
|
|
AutoBuffer<float16_t, 16> aligned_val; |
|
|
|
|
if (!isAligned<sizeof(float16_t)>(val)) |
|
|
|
|
{ |
|
|
|
|
size_t sz = tensor_proto.raw_data().size(); |
|
|
|
|
aligned_val.allocate(divUp(sz, sizeof(float16_t))); |
|
|
|
|
memcpy(aligned_val.data(), val, sz); |
|
|
|
|
val = (char*)aligned_val.data(); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
Mat(sizes, CV_16FC1, val).convertTo(blob, CV_32FC1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else if (datatype == opencv_onnx::TensorProto_DataType_DOUBLE) |
|
|
|
|
{ |
|
|
|
|
const ::google::protobuf::RepeatedField<double> field = tensor_proto.double_data(); |
|
|
|
|
CV_Assert(!field.empty()); |
|
|
|
|
Mat(sizes, CV_64FC1, (void*)field.data()).convertTo(blob, CV_32FC1); |
|
|
|
|
char* val = (char *)field.data(); |
|
|
|
|
#if CV_STRONG_ALIGNMENT |
|
|
|
|
// Aligned pointer is required.
|
|
|
|
|
AutoBuffer<double, 16> aligned_val; |
|
|
|
|
if (!isAligned<sizeof(double)>(val)) |
|
|
|
|
{ |
|
|
|
|
size_t sz = tensor_proto.raw_data().size(); |
|
|
|
|
aligned_val.allocate(divUp(sz, sizeof(double))); |
|
|
|
|
memcpy(aligned_val.data(), val, sz); |
|
|
|
|
val = (char*)aligned_val.data(); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
Mat(sizes, CV_64FC1, val).convertTo(blob, CV_32FC1); |
|
|
|
|
} |
|
|
|
|
else if (datatype == opencv_onnx::TensorProto_DataType_INT32) |
|
|
|
|
{ |
|
|
|
|