|
|
|
@ -306,14 +306,14 @@ static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int le |
|
|
|
|
if( !mask ) |
|
|
|
|
{ |
|
|
|
|
SumSqr_SIMD<T, ST, SQT> vop; |
|
|
|
|
int i = vop(src0, mask, sum, sqsum, len, cn), k = cn % 4; |
|
|
|
|
src += i * cn; |
|
|
|
|
int x = vop(src0, mask, sum, sqsum, len, cn), k = cn % 4; |
|
|
|
|
src = src0 + x * cn; |
|
|
|
|
|
|
|
|
|
if( k == 1 ) |
|
|
|
|
{ |
|
|
|
|
ST s0 = sum[0]; |
|
|
|
|
SQT sq0 = sqsum[0]; |
|
|
|
|
for( ; i < len; i++, src += cn ) |
|
|
|
|
for(int i = x; i < len; i++, src += cn ) |
|
|
|
|
{ |
|
|
|
|
T v = src[0]; |
|
|
|
|
s0 += v; sq0 += (SQT)v*v; |
|
|
|
@ -325,7 +325,7 @@ static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int le |
|
|
|
|
{ |
|
|
|
|
ST s0 = sum[0], s1 = sum[1]; |
|
|
|
|
SQT sq0 = sqsum[0], sq1 = sqsum[1]; |
|
|
|
|
for( ; i < len; i++, src += cn ) |
|
|
|
|
for(int i = x; i < len; i++, src += cn ) |
|
|
|
|
{ |
|
|
|
|
T v0 = src[0], v1 = src[1]; |
|
|
|
|
s0 += v0; sq0 += (SQT)v0*v0; |
|
|
|
@ -338,7 +338,7 @@ static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int le |
|
|
|
|
{ |
|
|
|
|
ST s0 = sum[0], s1 = sum[1], s2 = sum[2]; |
|
|
|
|
SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2]; |
|
|
|
|
for( ; i < len; i++, src += cn ) |
|
|
|
|
for(int i = x; i < len; i++, src += cn ) |
|
|
|
|
{ |
|
|
|
|
T v0 = src[0], v1 = src[1], v2 = src[2]; |
|
|
|
|
s0 += v0; sq0 += (SQT)v0*v0; |
|
|
|
@ -351,10 +351,10 @@ static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int le |
|
|
|
|
|
|
|
|
|
for( ; k < cn; k += 4 ) |
|
|
|
|
{ |
|
|
|
|
src = src0 + k; |
|
|
|
|
src = src0 + x * cn + k; |
|
|
|
|
ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3]; |
|
|
|
|
SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3]; |
|
|
|
|
for( ; i < len; i++, src += cn ) |
|
|
|
|
for(int i = x; i < len; i++, src += cn ) |
|
|
|
|
{ |
|
|
|
|
T v0, v1; |
|
|
|
|
v0 = src[0], v1 = src[1]; |
|
|
|
|