Open Source Computer Vision Library
https://opencv.org/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1131 lines
42 KiB
1131 lines
42 KiB
/*M/////////////////////////////////////////////////////////////////////////////////////// |
|
// |
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
|
// |
|
// By downloading, copying, installing or using the software you agree to this license. |
|
// If you do not agree to this license, do not download, install, |
|
// copy or use the software. |
|
// |
|
// |
|
// License Agreement |
|
// For Open Source Computer Vision Library |
|
// |
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
|
// Copyright (C) 2015, Itseez Inc., all rights reserved. |
|
// Third party copyrights are property of their respective owners. |
|
// |
|
// Redistribution and use in source and binary forms, with or without modification, |
|
// are permitted provided that the following conditions are met: |
|
// |
|
// * Redistribution's of source code must retain the above copyright notice, |
|
// this list of conditions and the following disclaimer. |
|
// |
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
|
// this list of conditions and the following disclaimer in the documentation |
|
// and/or other materials provided with the distribution. |
|
// |
|
// * The name of the copyright holders may not be used to endorse or promote products |
|
// derived from this software without specific prior written permission. |
|
// |
|
// This software is provided by the copyright holders and contributors "as is" and |
|
// any express or implied warranties, including, but not limited to, the implied |
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
|
// indirect, incidental, special, exemplary, or consequential damages |
|
// (including, but not limited to, procurement of substitute goods or services; |
|
// loss of use, data, or profits; or business interruption) however caused |
|
// and on any theory of liability, whether in contract, strict liability, |
|
// or tort (including negligence or otherwise) arising in any way out of |
|
// the use of this software, even if advised of the possibility of such damage. |
|
// |
|
//M*/ |
|
|
|
#include "precomp.hpp" |
|
#include "arithm_simd.hpp" |
|
#include "arithm_core.hpp" |
|
#include "replacement.hpp" |
|
|
|
namespace cv { namespace hal { |
|
|
|
//======================================= |
|
|
|
#undef CALL_HAL |
|
#define CALL_HAL(fun) \ |
|
int res = fun(src1, step1, src2, step2, dst, step, width, height); \ |
|
if (res == Error::Ok) \ |
|
return; \ |
|
else if (res != Error::NotImplemented) \ |
|
throw Failure(res); |
|
|
|
#if (ARITHM_USE_IPP == 1) |
|
static inline void fixSteps(width, height, size_t elemSize, size_t& step1, size_t& step2, size_t& step) |
|
{ |
|
if( height == 1 ) |
|
step1 = step2 = step = width*elemSize; |
|
} |
|
#define CALL_IPP_BIN_12(fun) \ |
|
CV_IPP_CHECK() \ |
|
{ \ |
|
fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ |
|
if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \ |
|
{ \ |
|
CV_IMPL_ADD(CV_IMPL_IPP); \ |
|
return; \ |
|
} \ |
|
setIppErrorStatus(); \ |
|
} |
|
#else |
|
#define CALL_IPP_BIN_12(fun) |
|
#endif |
|
|
|
//======================================= |
|
// Add |
|
//======================================= |
|
|
|
void add8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_add8u) |
|
CALL_IPP_BIN_12(ippiAdd_8u_C1RSfs) |
|
(vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void add8s( const schar* src1, size_t step1, |
|
const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_add8s) |
|
vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void add16u( const ushort* src1, size_t step1, |
|
const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_add16u) |
|
CALL_IPP_BIN_12(ippiAdd_16u_C1RSfs) |
|
(vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void add16s( const short* src1, size_t step1, |
|
const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_add16s) |
|
CALL_IPP_BIN_12(ippiAdd_16s_C1RSfs) |
|
(vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void add32s( const int* src1, size_t step1, |
|
const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_add32s) |
|
vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void add32f( const float* src1, size_t step1, |
|
const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_add32f) |
|
CALL_IPP_BIN_12(ippiAdd_32f_C1R) |
|
(vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void add64f( const double* src1, size_t step1, |
|
const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_add64f) |
|
vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
//======================================= |
|
|
|
#if (ARITHM_USE_IPP == 1) |
|
#define CALL_IPP_BIN_21(fun) \ |
|
CV_IPP_CHECK() \ |
|
{ \ |
|
fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ |
|
if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \ |
|
{ \ |
|
CV_IMPL_ADD(CV_IMPL_IPP); \ |
|
return; \ |
|
} \ |
|
setIppErrorStatus(); \ |
|
} |
|
#else |
|
#define CALL_IPP_BIN_21(fun) |
|
#endif |
|
|
|
//======================================= |
|
// Subtract |
|
//======================================= |
|
|
|
void sub8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_sub8u) |
|
CALL_IPP_BIN_21(ippiSub_8u_C1RSfs) |
|
(vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void sub8s( const schar* src1, size_t step1, |
|
const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_sub8s) |
|
vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void sub16u( const ushort* src1, size_t step1, |
|
const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_sub16u) |
|
CALL_IPP_BIN_21(ippiSub_16u_C1RSfs) |
|
(vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void sub16s( const short* src1, size_t step1, |
|
const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_sub16s) |
|
CALL_IPP_BIN_21(ippiSub_16s_C1RSfs) |
|
(vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void sub32s( const int* src1, size_t step1, |
|
const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_sub32s) |
|
vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void sub32f( const float* src1, size_t step1, |
|
const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_sub32f) |
|
CALL_IPP_BIN_21(ippiSub_32f_C1R) |
|
(vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void sub64f( const double* src1, size_t step1, |
|
const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_sub64f) |
|
vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
//======================================= |
|
|
|
#if (ARITHM_USE_IPP == 1) |
|
#define CALL_IPP_MIN_MAX(fun, type) \ |
|
CV_IPP_CHECK() \ |
|
{ \ |
|
type* s1 = (type*)src1; \ |
|
type* s2 = (type*)src2; \ |
|
type* d = dst; \ |
|
fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ |
|
int i = 0; \ |
|
for(; i < height; i++) \ |
|
{ \ |
|
if (0 > fun(s1, s2, d, width)) \ |
|
break; \ |
|
s1 = (type*)((uchar*)s1 + step1); \ |
|
s2 = (type*)((uchar*)s2 + step2); \ |
|
d = (type*)((uchar*)d + step); \ |
|
} \ |
|
if (i == height) \ |
|
{ \ |
|
CV_IMPL_ADD(CV_IMPL_IPP); \ |
|
return; \ |
|
} \ |
|
setIppErrorStatus(); \ |
|
} |
|
#else |
|
#define CALL_IPP_MIN_MAX(fun, type) |
|
#endif |
|
|
|
//======================================= |
|
// Max |
|
//======================================= |
|
|
|
void max8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_max8u) |
|
CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar) |
|
vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void max8s( const schar* src1, size_t step1, |
|
const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_max8s) |
|
vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void max16u( const ushort* src1, size_t step1, |
|
const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_max16u) |
|
CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort) |
|
vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void max16s( const short* src1, size_t step1, |
|
const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_max16s) |
|
vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void max32s( const int* src1, size_t step1, |
|
const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_max32s) |
|
vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void max32f( const float* src1, size_t step1, |
|
const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_max32f) |
|
CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float) |
|
vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void max64f( const double* src1, size_t step1, |
|
const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_max64f) |
|
CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double) |
|
vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
//======================================= |
|
// Min |
|
//======================================= |
|
|
|
void min8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_min8u) |
|
CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar) |
|
vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void min8s( const schar* src1, size_t step1, |
|
const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_min8s) |
|
vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void min16u( const ushort* src1, size_t step1, |
|
const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_min16u) |
|
CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort) |
|
vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void min16s( const short* src1, size_t step1, |
|
const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_min16s) |
|
vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void min32s( const int* src1, size_t step1, |
|
const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_min32s) |
|
vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void min32f( const float* src1, size_t step1, |
|
const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_min32f) |
|
CALL_IPP_MIN_MAX(ippsMinEvery_32f, float) |
|
vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void min64f( const double* src1, size_t step1, |
|
const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_min64f) |
|
CALL_IPP_MIN_MAX(ippsMinEvery_64f, double) |
|
vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
//======================================= |
|
// AbsDiff |
|
//======================================= |
|
|
|
void absdiff8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_absdiff8u) |
|
CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R) |
|
(vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void absdiff8s( const schar* src1, size_t step1, |
|
const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_absdiff8s) |
|
vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void absdiff16u( const ushort* src1, size_t step1, |
|
const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_absdiff16u) |
|
CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R) |
|
(vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void absdiff16s( const short* src1, size_t step1, |
|
const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_absdiff16s) |
|
vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void absdiff32s( const int* src1, size_t step1, |
|
const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_absdiff32s) |
|
vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
void absdiff32f( const float* src1, size_t step1, |
|
const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_absdiff32f) |
|
CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R) |
|
(vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void absdiff64f( const double* src1, size_t step1, |
|
const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_absdiff64f) |
|
vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height); |
|
} |
|
|
|
//======================================= |
|
// Logical |
|
//======================================= |
|
|
|
void and8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_and8u) |
|
CALL_IPP_BIN_12(ippiAnd_8u_C1R) |
|
(vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void or8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_or8u) |
|
CALL_IPP_BIN_12(ippiOr_8u_C1R) |
|
(vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void xor8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_xor8u) |
|
CALL_IPP_BIN_12(ippiXor_8u_C1R) |
|
(vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
void not8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* ) |
|
{ |
|
CALL_HAL(hal_not8u) |
|
CALL_IPP_BIN_12(ippiNot_8u_C1R) |
|
(vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); |
|
} |
|
|
|
//======================================= |
|
|
|
#undef CALL_HAL |
|
#define CALL_HAL(fun) \ |
|
int res = fun(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); \ |
|
if (res == Error::Ok) \ |
|
return; \ |
|
else if (res != Error::NotImplemented) \ |
|
throw Failure(res); |
|
|
|
#if ARITHM_USE_IPP |
|
inline static IppCmpOp convert_cmp(int _cmpop) |
|
{ |
|
return _cmpop == CMP_EQ ? ippCmpEq : |
|
_cmpop == CMP_GT ? ippCmpGreater : |
|
_cmpop == CMP_GE ? ippCmpGreaterEq : |
|
_cmpop == CMP_LT ? ippCmpLess : |
|
_cmpop == CMP_LE ? ippCmpLessEq : |
|
(IppCmpOp)-1; |
|
} |
|
#define CALL_IPP_CMP(fun) \ |
|
CV_IPP_CHECK() \ |
|
{ \ |
|
IppCmpOp op = convert_cmp(*(int *)_cmpop); \ |
|
if( op >= 0 ) \ |
|
{ \ |
|
fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ |
|
if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \ |
|
{ \ |
|
CV_IMPL_ADD(CV_IMPL_IPP); \ |
|
return; \ |
|
} \ |
|
setIppErrorStatus(); \ |
|
} \ |
|
} |
|
#else |
|
#define CALL_IPP_CMP(fun) |
|
#endif |
|
|
|
//======================================= |
|
// Compare |
|
//======================================= |
|
|
|
void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* _cmpop) |
|
{ |
|
CALL_HAL(hal_cmp8u) |
|
CALL_IPP_CMP(ippiCompare_8u_C1R) |
|
//vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); |
|
int code = *(int*)_cmpop; |
|
step1 /= sizeof(src1[0]); |
|
step2 /= sizeof(src2[0]); |
|
if( code == CMP_GE || code == CMP_LT ) |
|
{ |
|
std::swap(src1, src2); |
|
std::swap(step1, step2); |
|
code = code == CMP_GE ? CMP_LE : CMP_GT; |
|
} |
|
|
|
if( code == CMP_GT || code == CMP_LE ) |
|
{ |
|
int m = code == CMP_GT ? 0 : 255; |
|
for( ; height--; src1 += step1, src2 += step2, dst += step ) |
|
{ |
|
int x =0; |
|
#if CV_SSE2 |
|
if( USE_SSE2 ) |
|
{ |
|
__m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1); |
|
__m128i c128 = _mm_set1_epi8 (-128); |
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); |
|
__m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); |
|
// no simd for 8u comparison, that's why we need the trick |
|
r00 = _mm_sub_epi8(r00,c128); |
|
r10 = _mm_sub_epi8(r10,c128); |
|
|
|
r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128); |
|
_mm_storeu_si128((__m128i*)(dst + x),r00); |
|
|
|
} |
|
} |
|
#elif CV_NEON |
|
uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); |
|
|
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); |
|
} |
|
|
|
#endif |
|
|
|
for( ; x < width; x++ ){ |
|
dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); |
|
} |
|
} |
|
} |
|
else if( code == CMP_EQ || code == CMP_NE ) |
|
{ |
|
int m = code == CMP_EQ ? 0 : 255; |
|
for( ; height--; src1 += step1, src2 += step2, dst += step ) |
|
{ |
|
int x = 0; |
|
#if CV_SSE2 |
|
if( USE_SSE2 ) |
|
{ |
|
__m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1); |
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); |
|
__m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); |
|
r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128); |
|
_mm_storeu_si128((__m128i*)(dst + x), r00); |
|
} |
|
} |
|
#elif CV_NEON |
|
uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); |
|
|
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); |
|
} |
|
#endif |
|
for( ; x < width; x++ ) |
|
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); |
|
} |
|
} |
|
} |
|
|
|
void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* _cmpop) |
|
{ |
|
CALL_HAL(hal_cmp8s) |
|
cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); |
|
} |
|
|
|
void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* _cmpop) |
|
{ |
|
CALL_HAL(hal_cmp16u) |
|
CALL_IPP_CMP(ippiCompare_16u_C1R) |
|
cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); |
|
} |
|
|
|
void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* _cmpop) |
|
{ |
|
CALL_HAL(hal_cmp16s) |
|
CALL_IPP_CMP(ippiCompare_16s_C1R) |
|
//vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); |
|
|
|
int code = *(int*)_cmpop; |
|
step1 /= sizeof(src1[0]); |
|
step2 /= sizeof(src2[0]); |
|
if( code == CMP_GE || code == CMP_LT ) |
|
{ |
|
std::swap(src1, src2); |
|
std::swap(step1, step2); |
|
code = code == CMP_GE ? CMP_LE : CMP_GT; |
|
} |
|
|
|
if( code == CMP_GT || code == CMP_LE ) |
|
{ |
|
int m = code == CMP_GT ? 0 : 255; |
|
for( ; height--; src1 += step1, src2 += step2, dst += step ) |
|
{ |
|
int x =0; |
|
#if CV_SSE2 |
|
if( USE_SSE2) |
|
{ |
|
__m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1); |
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); |
|
__m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); |
|
r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); |
|
__m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); |
|
__m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); |
|
r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128); |
|
r11 = _mm_packs_epi16(r00, r01); |
|
_mm_storeu_si128((__m128i*)(dst + x), r11); |
|
} |
|
if( x <= width-8) |
|
{ |
|
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); |
|
__m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); |
|
r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); |
|
r10 = _mm_packs_epi16(r00, r00); |
|
_mm_storel_epi64((__m128i*)(dst + x), r10); |
|
|
|
x += 8; |
|
} |
|
} |
|
#elif CV_NEON |
|
uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); |
|
|
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
int16x8_t in1 = vld1q_s16(src1 + x); |
|
int16x8_t in2 = vld1q_s16(src2 + x); |
|
uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2)); |
|
|
|
in1 = vld1q_s16(src1 + x + 8); |
|
in2 = vld1q_s16(src2 + x + 8); |
|
uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2)); |
|
|
|
vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); |
|
} |
|
#endif |
|
|
|
for( ; x < width; x++ ){ |
|
dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); |
|
} |
|
} |
|
} |
|
else if( code == CMP_EQ || code == CMP_NE ) |
|
{ |
|
int m = code == CMP_EQ ? 0 : 255; |
|
for( ; height--; src1 += step1, src2 += step2, dst += step ) |
|
{ |
|
int x = 0; |
|
#if CV_SSE2 |
|
if( USE_SSE2 ) |
|
{ |
|
__m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1); |
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); |
|
__m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); |
|
r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); |
|
__m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); |
|
__m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); |
|
r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128); |
|
r11 = _mm_packs_epi16(r00, r01); |
|
_mm_storeu_si128((__m128i*)(dst + x), r11); |
|
} |
|
if( x <= width - 8) |
|
{ |
|
__m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); |
|
__m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); |
|
r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); |
|
r10 = _mm_packs_epi16(r00, r00); |
|
_mm_storel_epi64((__m128i*)(dst + x), r10); |
|
|
|
x += 8; |
|
} |
|
} |
|
#elif CV_NEON |
|
uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); |
|
|
|
for( ; x <= width - 16; x += 16 ) |
|
{ |
|
int16x8_t in1 = vld1q_s16(src1 + x); |
|
int16x8_t in2 = vld1q_s16(src2 + x); |
|
uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2)); |
|
|
|
in1 = vld1q_s16(src1 + x + 8); |
|
in2 = vld1q_s16(src2 + x + 8); |
|
uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2)); |
|
|
|
vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); |
|
} |
|
#endif |
|
for( ; x < width; x++ ) |
|
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); |
|
} |
|
} |
|
} |
|
|
|
void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* _cmpop) |
|
{ |
|
CALL_HAL(hal_cmp32s) |
|
cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); |
|
} |
|
|
|
void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* _cmpop) |
|
{ |
|
CALL_HAL(hal_cmp32f) |
|
CALL_IPP_CMP(ippiCompare_32f_C1R) |
|
cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); |
|
} |
|
|
|
void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* _cmpop) |
|
{ |
|
CALL_HAL(hal_cmp64f) |
|
cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); |
|
} |
|
|
|
//======================================= |
|
|
|
#undef CALL_HAL |
|
#define CALL_HAL(fun) \ |
|
int res = fun(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); \ |
|
if (res == Error::Ok) \ |
|
return; \ |
|
else if (res != Error::NotImplemented) \ |
|
throw Failure(res); |
|
|
|
#if defined HAVE_IPP |
|
#define CALL_IPP_MUL(fun) \ |
|
CV_IPP_CHECK() \ |
|
{ \ |
|
if (std::fabs(fscale - 1) <= FLT_EPSILON) \ |
|
{ \ |
|
if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \ |
|
{ \ |
|
CV_IMPL_ADD(CV_IMPL_IPP); \ |
|
return; \ |
|
} \ |
|
setIppErrorStatus(); \ |
|
} \ |
|
} |
|
#else |
|
#define CALL_IPP_MUL(fun) |
|
#endif |
|
|
|
//======================================= |
|
// Multilpy |
|
//======================================= |
|
|
|
void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_mul8u) |
|
float fscale = (float)*(const double*)scale; |
|
CALL_IPP_MUL(ippiMul_8u_C1RSfs) |
|
mul_(src1, step1, src2, step2, dst, step, width, height, fscale); |
|
} |
|
|
|
void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_mul8s) |
|
mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale); |
|
} |
|
|
|
void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_mul16u) |
|
float fscale = (float)*(const double*)scale; |
|
CALL_IPP_MUL(ippiMul_16u_C1RSfs) |
|
mul_(src1, step1, src2, step2, dst, step, width, height, fscale); |
|
} |
|
|
|
void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_mul16s) |
|
float fscale = (float)*(const double*)scale; |
|
CALL_IPP_MUL(ippiMul_16s_C1RSfs) |
|
mul_(src1, step1, src2, step2, dst, step, width, height, fscale); |
|
} |
|
|
|
void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_mul32s) |
|
mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_mul32f) |
|
float fscale = (float)*(const double*)scale; |
|
CALL_IPP_MUL(ippiMul_32f_C1R) |
|
mul_(src1, step1, src2, step2, dst, step, width, height, fscale); |
|
} |
|
|
|
void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_mul64f) |
|
mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
//======================================= |
|
// Divide |
|
//======================================= |
|
|
|
void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_div8u) |
|
if( src1 ) |
|
div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
else |
|
recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_div8s) |
|
div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_div16u) |
|
div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void div16s( const short* src1, size_t step1, const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_div16s) |
|
div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void div32s( const int* src1, size_t step1, const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_div32s) |
|
div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void div32f( const float* src1, size_t step1, const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_div32f) |
|
div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void div64f( const double* src1, size_t step1, const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_div64f) |
|
div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
//======================================= |
|
// Reciprocial |
|
//======================================= |
|
|
|
void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_recip8u) |
|
recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_recip8s) |
|
recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_recip16u) |
|
recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_recip16s) |
|
recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_recip32s) |
|
recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_recip32f) |
|
recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* scale) |
|
{ |
|
CALL_HAL(hal_recip64f) |
|
recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); |
|
} |
|
|
|
//======================================= |
|
|
|
#undef CALL_HAL |
|
#define CALL_HAL(fun) \ |
|
int res = fun(src1, step1, src2, step2, dst, step, width, height, scalars); \ |
|
if (res == Error::Ok) \ |
|
return; \ |
|
else if (res != Error::NotImplemented) \ |
|
throw Failure(res); |
|
|
|
//======================================= |
|
// Add weighted |
|
//======================================= |
|
|
|
void |
|
addWeighted8u( const uchar* src1, size_t step1, |
|
const uchar* src2, size_t step2, |
|
uchar* dst, size_t step, int width, int height, |
|
void* scalars ) |
|
{ |
|
CALL_HAL(hal_addWeighted8u) |
|
const double* scalars_ = (const double*)scalars; |
|
float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2]; |
|
|
|
for( ; height--; src1 += step1, src2 += step2, dst += step ) |
|
{ |
|
int x = 0; |
|
|
|
#if CV_SSE2 |
|
if( USE_SSE2 ) |
|
{ |
|
__m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma); |
|
__m128i z = _mm_setzero_si128(); |
|
|
|
for( ; x <= width - 8; x += 8 ) |
|
{ |
|
__m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z); |
|
__m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z); |
|
|
|
__m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z)); |
|
__m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z)); |
|
__m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z)); |
|
__m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z)); |
|
|
|
u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4)); |
|
u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4)); |
|
u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4); |
|
|
|
u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1)); |
|
u = _mm_packus_epi16(u, u); |
|
|
|
_mm_storel_epi64((__m128i*)(dst + x), u); |
|
} |
|
} |
|
#elif CV_NEON |
|
float32x4_t g = vdupq_n_f32 (gamma); |
|
|
|
for( ; x <= width - 8; x += 8 ) |
|
{ |
|
uint8x8_t in1 = vld1_u8(src1+x); |
|
uint16x8_t in1_16 = vmovl_u8(in1); |
|
float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16))); |
|
float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16))); |
|
|
|
uint8x8_t in2 = vld1_u8(src2+x); |
|
uint16x8_t in2_16 = vmovl_u8(in2); |
|
float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16))); |
|
float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16))); |
|
|
|
float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta)); |
|
float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta)); |
|
out_f_l = vaddq_f32(out_f_l, g); |
|
out_f_h = vaddq_f32(out_f_h, g); |
|
|
|
uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l)); |
|
uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h)); |
|
|
|
uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h); |
|
uint8x8_t out = vqmovn_u16(out_16); |
|
|
|
vst1_u8(dst+x, out); |
|
} |
|
#endif |
|
#if CV_ENABLE_UNROLLED |
|
for( ; x <= width - 4; x += 4 ) |
|
{ |
|
float t0, t1; |
|
t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; |
|
t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma; |
|
|
|
dst[x] = saturate_cast<uchar>(t0); |
|
dst[x+1] = saturate_cast<uchar>(t1); |
|
|
|
t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma; |
|
t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma; |
|
|
|
dst[x+2] = saturate_cast<uchar>(t0); |
|
dst[x+3] = saturate_cast<uchar>(t1); |
|
} |
|
#endif |
|
|
|
for( ; x < width; x++ ) |
|
{ |
|
float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; |
|
dst[x] = saturate_cast<uchar>(t0); |
|
} |
|
} |
|
} |
|
|
|
void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, |
|
schar* dst, size_t step, int width, int height, void* scalars ) |
|
{ |
|
CALL_HAL(hal_addWeighted8s) |
|
addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars); |
|
} |
|
|
|
void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, |
|
ushort* dst, size_t step, int width, int height, void* scalars ) |
|
{ |
|
CALL_HAL(hal_addWeighted16u) |
|
addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars); |
|
} |
|
|
|
void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, |
|
short* dst, size_t step, int width, int height, void* scalars ) |
|
{ |
|
CALL_HAL(hal_addWeighted16s) |
|
addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars); |
|
} |
|
|
|
void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, |
|
int* dst, size_t step, int width, int height, void* scalars ) |
|
{ |
|
CALL_HAL(hal_addWeighted32s) |
|
addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars); |
|
} |
|
|
|
void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, |
|
float* dst, size_t step, int width, int height, void* scalars ) |
|
{ |
|
CALL_HAL(hal_addWeighted32f) |
|
addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars); |
|
} |
|
|
|
void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, |
|
double* dst, size_t step, int width, int height, void* scalars ) |
|
{ |
|
CALL_HAL(hal_addWeighted64f) |
|
addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars); |
|
} |
|
|
|
}} // cv::hal::
|
|
|