|
|
|
@ -710,67 +710,78 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) |
|
|
|
|
result; |
|
|
|
|
result.d = 0; |
|
|
|
|
NAryMatIterator it(arrays, ptrs); |
|
|
|
|
int j, total = (int)it.size, blockSize = total; |
|
|
|
|
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) || |
|
|
|
|
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S); |
|
|
|
|
int isum = 0; |
|
|
|
|
int *ibuf = &result.i; |
|
|
|
|
AutoBuffer<float> fltbuf_; |
|
|
|
|
float* fltbuf = 0; |
|
|
|
|
size_t esz = 0; |
|
|
|
|
|
|
|
|
|
if( blockSum ) |
|
|
|
|
{ |
|
|
|
|
esz = src.elemSize(); |
|
|
|
|
CV_CheckLT((size_t)it.size, (size_t)INT_MAX, ""); |
|
|
|
|
|
|
|
|
|
if( depth == CV_16F ) |
|
|
|
|
{ |
|
|
|
|
blockSize = std::min(blockSize, 1024); |
|
|
|
|
fltbuf_.allocate(blockSize); |
|
|
|
|
fltbuf = fltbuf_.data(); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
if ((normType == NORM_L1 && depth <= CV_16S) || |
|
|
|
|
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S)) |
|
|
|
|
{ |
|
|
|
|
// special case to handle "integer" overflow in accumulator
|
|
|
|
|
const size_t esz = src.elemSize(); |
|
|
|
|
const int total = (int)it.size; |
|
|
|
|
const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn; |
|
|
|
|
const int blockSize = std::min(total, intSumBlockSize); |
|
|
|
|
int isum = 0; |
|
|
|
|
int count = 0; |
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it) |
|
|
|
|
{ |
|
|
|
|
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn; |
|
|
|
|
blockSize = std::min(blockSize, intSumBlockSize); |
|
|
|
|
ibuf = &isum; |
|
|
|
|
for (int j = 0; j < total; j += blockSize) |
|
|
|
|
{ |
|
|
|
|
int bsz = std::min(total - j, blockSize); |
|
|
|
|
func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn); |
|
|
|
|
count += bsz; |
|
|
|
|
if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) |
|
|
|
|
{ |
|
|
|
|
result.d += isum; |
|
|
|
|
isum = 0; |
|
|
|
|
count = 0; |
|
|
|
|
} |
|
|
|
|
ptrs[0] += bsz*esz; |
|
|
|
|
if (ptrs[1]) |
|
|
|
|
ptrs[1] += bsz; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it ) |
|
|
|
|
else if (depth == CV_16F) |
|
|
|
|
{ |
|
|
|
|
for( j = 0; j < total; j += blockSize ) |
|
|
|
|
const size_t esz = src.elemSize(); |
|
|
|
|
const int total = (int)it.size; |
|
|
|
|
const int blockSize = std::min(total, divUp(1024, cn)); |
|
|
|
|
AutoBuffer<float, 1024> fltbuf(blockSize); |
|
|
|
|
float* data0 = fltbuf.data(); |
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it) |
|
|
|
|
{ |
|
|
|
|
int bsz = std::min(total - j, blockSize); |
|
|
|
|
const uchar* data = ptrs[0]; |
|
|
|
|
if( depth == CV_16F ) |
|
|
|
|
{ |
|
|
|
|
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz); |
|
|
|
|
data = (const uchar*)fltbuf; |
|
|
|
|
} |
|
|
|
|
func( data, ptrs[1], (uchar*)ibuf, bsz, cn ); |
|
|
|
|
if( blockSum && depth != CV_16F ) |
|
|
|
|
for (int j = 0; j < total; j += blockSize) |
|
|
|
|
{ |
|
|
|
|
result.d += isum; |
|
|
|
|
isum = 0; |
|
|
|
|
int bsz = std::min(total - j, blockSize); |
|
|
|
|
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn); |
|
|
|
|
func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn); |
|
|
|
|
ptrs[0] += bsz*esz; |
|
|
|
|
if (ptrs[1]) |
|
|
|
|
ptrs[1] += bsz; |
|
|
|
|
} |
|
|
|
|
ptrs[0] += bsz*esz; |
|
|
|
|
if( ptrs[1] ) |
|
|
|
|
ptrs[1] += bsz; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
// generic implementation
|
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it) |
|
|
|
|
{ |
|
|
|
|
func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if( normType == NORM_INF ) |
|
|
|
|
{ |
|
|
|
|
if( depth == CV_64F ) |
|
|
|
|
; |
|
|
|
|
else if( depth == CV_32F ) |
|
|
|
|
result.d = result.f; |
|
|
|
|
if(depth == CV_64F || depth == CV_16F) |
|
|
|
|
return result.d; |
|
|
|
|
else if (depth == CV_32F) |
|
|
|
|
return result.f; |
|
|
|
|
else |
|
|
|
|
result.d = result.i; |
|
|
|
|
return result.i; |
|
|
|
|
} |
|
|
|
|
else if( normType == NORM_L2 ) |
|
|
|
|
result.d = std::sqrt(result.d); |
|
|
|
|
return std::sqrt(result.d); |
|
|
|
|
|
|
|
|
|
return result.d; |
|
|
|
|
} |
|
|
|
@ -1186,70 +1197,82 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m |
|
|
|
|
result; |
|
|
|
|
result.d = 0; |
|
|
|
|
NAryMatIterator it(arrays, ptrs); |
|
|
|
|
int j, total = (int)it.size, blockSize = total; |
|
|
|
|
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) || |
|
|
|
|
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S); |
|
|
|
|
unsigned isum = 0; |
|
|
|
|
unsigned *ibuf = &result.u; |
|
|
|
|
AutoBuffer<float> fltbuf_; |
|
|
|
|
float* fltbuf = 0; |
|
|
|
|
size_t esz = 0; |
|
|
|
|
|
|
|
|
|
if( blockSum ) |
|
|
|
|
{ |
|
|
|
|
esz = src1.elemSize(); |
|
|
|
|
CV_CheckLT((size_t)it.size, (size_t)INT_MAX, ""); |
|
|
|
|
|
|
|
|
|
if( depth == CV_16F ) |
|
|
|
|
{ |
|
|
|
|
blockSize = std::min(blockSize, 1024); |
|
|
|
|
fltbuf_.allocate(blockSize*2); |
|
|
|
|
fltbuf = fltbuf_.data(); |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
if ((normType == NORM_L1 && depth <= CV_16S) || |
|
|
|
|
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S)) |
|
|
|
|
{ |
|
|
|
|
// special case to handle "integer" overflow in accumulator
|
|
|
|
|
const size_t esz = src1.elemSize(); |
|
|
|
|
const int total = (int)it.size; |
|
|
|
|
const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15); |
|
|
|
|
const int blockSize = std::min(total, intSumBlockSize); |
|
|
|
|
int isum = 0; |
|
|
|
|
int count = 0; |
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it) |
|
|
|
|
{ |
|
|
|
|
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn; |
|
|
|
|
blockSize = std::min(blockSize, intSumBlockSize); |
|
|
|
|
ibuf = &isum; |
|
|
|
|
for (int j = 0; j < total; j += blockSize) |
|
|
|
|
{ |
|
|
|
|
int bsz = std::min(total - j, blockSize); |
|
|
|
|
func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn); |
|
|
|
|
count += bsz; |
|
|
|
|
if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) |
|
|
|
|
{ |
|
|
|
|
result.d += isum; |
|
|
|
|
isum = 0; |
|
|
|
|
count = 0; |
|
|
|
|
} |
|
|
|
|
ptrs[0] += bsz*esz; |
|
|
|
|
ptrs[1] += bsz*esz; |
|
|
|
|
if (ptrs[2]) |
|
|
|
|
ptrs[2] += bsz; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for( size_t i = 0; i < it.nplanes; i++, ++it ) |
|
|
|
|
else if (depth == CV_16F) |
|
|
|
|
{ |
|
|
|
|
for( j = 0; j < total; j += blockSize ) |
|
|
|
|
const size_t esz = src1.elemSize(); |
|
|
|
|
const int total = (int)it.size; |
|
|
|
|
const int blockSize = std::min(total, divUp(512, cn)); |
|
|
|
|
AutoBuffer<float, 1024> fltbuf(blockSize * 2); |
|
|
|
|
float* data0 = fltbuf.data(); |
|
|
|
|
float* data1 = fltbuf.data() + blockSize * cn; |
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it) |
|
|
|
|
{ |
|
|
|
|
int bsz = std::min(total - j, blockSize); |
|
|
|
|
const uchar *data0 = ptrs[0], *data1 = ptrs[1]; |
|
|
|
|
if( depth == CV_16F ) |
|
|
|
|
{ |
|
|
|
|
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz); |
|
|
|
|
hal::cvt16f32f((const float16_t*)ptrs[1], fltbuf + bsz, bsz); |
|
|
|
|
data0 = (const uchar*)fltbuf; |
|
|
|
|
data1 = (const uchar*)(fltbuf + bsz); |
|
|
|
|
} |
|
|
|
|
func( data0, data1, ptrs[2], (uchar*)ibuf, bsz, cn ); |
|
|
|
|
if( blockSum && depth != CV_16F ) |
|
|
|
|
for (int j = 0; j < total; j += blockSize) |
|
|
|
|
{ |
|
|
|
|
result.d += isum; |
|
|
|
|
isum = 0; |
|
|
|
|
int bsz = std::min(total - j, blockSize); |
|
|
|
|
hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn); |
|
|
|
|
hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn); |
|
|
|
|
func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn); |
|
|
|
|
ptrs[0] += bsz*esz; |
|
|
|
|
ptrs[1] += bsz*esz; |
|
|
|
|
if (ptrs[2]) |
|
|
|
|
ptrs[2] += bsz; |
|
|
|
|
} |
|
|
|
|
ptrs[0] += bsz*esz; |
|
|
|
|
ptrs[1] += bsz*esz; |
|
|
|
|
if( ptrs[2] ) |
|
|
|
|
ptrs[2] += bsz; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
// generic implementation
|
|
|
|
|
for (size_t i = 0; i < it.nplanes; i++, ++it) |
|
|
|
|
{ |
|
|
|
|
func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if( normType == NORM_INF ) |
|
|
|
|
{ |
|
|
|
|
if( depth == CV_64F ) |
|
|
|
|
; |
|
|
|
|
else if( depth == CV_32F ) |
|
|
|
|
result.d = result.f; |
|
|
|
|
if (depth == CV_64F || depth == CV_16F) |
|
|
|
|
return result.d; |
|
|
|
|
else if (depth == CV_32F) |
|
|
|
|
return result.f; |
|
|
|
|
else |
|
|
|
|
result.d = result.u; |
|
|
|
|
return result.u; |
|
|
|
|
} |
|
|
|
|
else if( normType == NORM_L2 ) |
|
|
|
|
result.d = std::sqrt(result.d); |
|
|
|
|
return std::sqrt(result.d); |
|
|
|
|
|
|
|
|
|
return result.d; |
|
|
|
|
} |
|
|
|
|