diff --git a/3rdparty/ndsrvp/include/core.hpp b/3rdparty/ndsrvp/include/core.hpp index 190a1b926b..ee57668539 100644 --- a/3rdparty/ndsrvp/include/core.hpp +++ b/3rdparty/ndsrvp/include/core.hpp @@ -1,6 +1,6 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. +// of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_NDSRVP_CORE_HPP #define OPENCV_NDSRVP_CORE_HPP diff --git a/3rdparty/ndsrvp/include/imgproc.hpp b/3rdparty/ndsrvp/include/imgproc.hpp index 3a572172a8..94104f0b71 100644 --- a/3rdparty/ndsrvp/include/imgproc.hpp +++ b/3rdparty/ndsrvp/include/imgproc.hpp @@ -1,18 +1,12 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. +// of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_NDSRVP_IMGPROC_HPP #define OPENCV_NDSRVP_IMGPROC_HPP namespace cv { -// ################ remap ################ - -void remap(InputArray _src, OutputArray _dst, - InputArray _map1, InputArray _map2, - int interpolation, int borderType, const Scalar& borderValue); - namespace ndsrvp { enum InterpolationMasks { @@ -36,23 +30,36 @@ int integral(int depth, int sdepth, int sqdepth, // ################ warpAffine ################ -int warpAffine(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[6], int interpolation, int borderType, const double borderValue[4]); +int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw); -#undef cv_hal_warpAffine -#define cv_hal_warpAffine (cv::ndsrvp::warpAffine) +#undef cv_hal_warpAffineBlocklineNN +#define cv_hal_warpAffineBlocklineNN (cv::ndsrvp::warpAffineBlocklineNN) + +int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw); + +#undef cv_hal_warpAffineBlockline +#define cv_hal_warpAffineBlockline (cv::ndsrvp::warpAffineBlockline) // ################ warpPerspective ################ -int warpPerspective(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[9], int interpolation, int borderType, const double borderValue[4]); +int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw); + +#undef cv_hal_warpPerspectiveBlocklineNN +#define cv_hal_warpPerspectiveBlocklineNN (cv::ndsrvp::warpPerspectiveBlocklineNN) + +int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw); + +#undef cv_hal_warpPerspectiveBlockline +#define cv_hal_warpPerspectiveBlockline (cv::ndsrvp::warpPerspectiveBlockline) + +// ################ remap ################ + +int remap32f(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, + uchar *dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step, + float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4]); -#undef cv_hal_warpPerspective -#define cv_hal_warpPerspective (cv::ndsrvp::warpPerspective) +#undef cv_hal_remap32f +#define cv_hal_remap32f (cv::ndsrvp::remap32f) // ################ threshold ################ diff --git a/3rdparty/ndsrvp/ndsrvp_hal.hpp b/3rdparty/ndsrvp/ndsrvp_hal.hpp index 7f12636520..8ceac78db3 100644 --- a/3rdparty/ndsrvp/ndsrvp_hal.hpp +++ b/3rdparty/ndsrvp/ndsrvp_hal.hpp @@ -1,13 +1,14 @@ // This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. +// of this distribution and at http://opencv.org/license.html. #ifndef OPENCV_NDSRVP_HAL_HPP #define OPENCV_NDSRVP_HAL_HPP -#include "opencv2/core/mat.hpp" #include +#include "opencv2/core/hal/interface.h" + #include "include/core.hpp" #include "include/imgproc.hpp" #include "include/features2d.hpp" diff --git a/3rdparty/ndsrvp/src/cvutils.cpp b/3rdparty/ndsrvp/src/cvutils.cpp new file mode 100644 index 0000000000..48e025488f --- /dev/null +++ b/3rdparty/ndsrvp/src/cvutils.cpp @@ -0,0 +1,78 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "cvutils.hpp" + +namespace cv { + +namespace ndsrvp { + +// fastMalloc + +// [0][1][2][3][4][5][6][7][8][9] +// ^udata +// ^adata +// ^adata[-1] == udata + +void* fastMalloc(size_t size) +{ + uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN); + if(!udata) + ndsrvp_error(Error::StsNoMem, "fastMalloc(): Not enough memory"); + uchar** adata = (uchar**)align((size_t)((uchar**)udata + 1), CV_MALLOC_ALIGN); + adata[-1] = udata; + return adata; +} + +void fastFree(void* ptr) +{ + if(ptr) + { + uchar* udata = ((uchar**)ptr)[-1]; + if(!(udata < (uchar*)ptr && ((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*) + CV_MALLOC_ALIGN))) + ndsrvp_error(Error::StsBadArg, "fastFree(): Invalid memory block"); + free(udata); + } +} + +// borderInterpolate + +int borderInterpolate(int p, int len, int borderType) +{ + if( (unsigned)p < (unsigned)len ) + ; + else if( borderType == CV_HAL_BORDER_REPLICATE ) + p = p < 0 ? 0 : len - 1; + else if( borderType == CV_HAL_BORDER_REFLECT || borderType == CV_HAL_BORDER_REFLECT_101 ) + { + int delta = borderType == CV_HAL_BORDER_REFLECT_101; + if( len == 1 ) + return 0; + do + { + if( p < 0 ) + p = -p - 1 + delta; + else + p = len - 1 - (p - len) - delta; + } + while( (unsigned)p >= (unsigned)len ); + } + else if( borderType == CV_HAL_BORDER_WRAP ) + { + ndsrvp_assert(len > 0); + if( p < 0 ) + p -= ((p - len + 1) / len) * len; + if( p >= len ) + p %= len; + } + else if( borderType == CV_HAL_BORDER_CONSTANT ) + p = -1; + else + ndsrvp_error(Error::StsBadArg, "borderInterpolate(): Unknown/unsupported border type"); + return p; +} + +} // namespace ndsrvp + +} // namespace cv diff --git a/3rdparty/ndsrvp/src/cvutils.hpp b/3rdparty/ndsrvp/src/cvutils.hpp new file mode 100644 index 0000000000..8cf1476ed6 --- /dev/null +++ b/3rdparty/ndsrvp/src/cvutils.hpp @@ -0,0 +1,108 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_NDSRVP_CVUTILS_HPP +#define OPENCV_NDSRVP_CVUTILS_HPP + +#include + +#include "opencv2/core/hal/interface.h" + +#include +#include +#include +#include +#include +#include +#include + +// misc functions that not exposed to public interface + +namespace cv { + +namespace ndsrvp { + +void* fastMalloc(size_t size); +void fastFree(void* ptr); +int borderInterpolate(int p, int len, int borderType); + +#ifndef MAX +# define MAX(a,b) ((a) < (b) ? (b) : (a)) +#endif + +#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT) +#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1) + +#define CV_MALLOC_ALIGN 64 + +// error codes + +enum Error{ + StsNoMem = -4, + StsBadArg = -5, + StsAssert = -215 +}; + +// output error + +#define ndsrvp_assert(expr) { if(!(expr)) ndsrvp_error(Error::StsAssert, std::string(#expr)); } + +inline void ndsrvp_error(int code, std::string msg = "") +{ + std::cerr << "NDSRVP Error: code " << code << std::endl; + if(!msg.empty()) + std::cerr << msg << std::endl; + if(code < 0) + throw code; +} + +// clip & vclip + +inline int clip(int x, int a, int b) +{ + return x >= a ? (x < b ? x : b - 1) : a; +} + +inline int32x2_t vclip(int32x2_t x, int32x2_t a, int32x2_t b) +{ + return (int32x2_t)__nds__bpick((long)a, __nds__bpick((long)(b - 1), (long)x, (long)(x < b)), (long)(x >= a)); +} + +// saturate + +template static inline _Tp saturate_cast(int v) { return _Tp(v); } +template static inline _Tp saturate_cast(float v) { return _Tp(v); } +template static inline _Tp saturate_cast(double v) { return _Tp(v); } + +template<> inline uchar saturate_cast(int v) { return __nds__uclip32(v, 8); } +template<> inline uchar saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline uchar saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline char saturate_cast(int v) { return __nds__sclip32(v, 7); } +template<> inline char saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline char saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline ushort saturate_cast(int v) { return __nds__uclip32(v, 16); } +template<> inline ushort saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline ushort saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline short saturate_cast(int v) { return __nds__sclip32(v, 15); } +template<> inline short saturate_cast(float v) { return saturate_cast((int)lrintf(v)); } +template<> inline short saturate_cast(double v) { return saturate_cast((int)lrint(v)); } + +template<> inline int saturate_cast(float v) { return (int)lrintf(v); } +template<> inline int saturate_cast(double v) { return (int)lrint(v); } + +// align + +inline long align(size_t v, int n) +{ + return (v + n - 1) & -n; +} + +} // namespace ndsrvp + +} // namespace cv + +#endif diff --git a/3rdparty/ndsrvp/src/integral.cpp b/3rdparty/ndsrvp/src/integral.cpp index 37030a8d4c..e1dd993a90 100644 --- a/3rdparty/ndsrvp/src/integral.cpp +++ b/3rdparty/ndsrvp/src/integral.cpp @@ -3,6 +3,8 @@ // of this distribution and at http://opencv.org/license.html. #include "ndsrvp_hal.hpp" +#include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { diff --git a/3rdparty/ndsrvp/src/remap.cpp b/3rdparty/ndsrvp/src/remap.cpp new file mode 100644 index 0000000000..30e4d218e3 --- /dev/null +++ b/3rdparty/ndsrvp/src/remap.cpp @@ -0,0 +1,188 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "ndsrvp_hal.hpp" +#include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" + +namespace cv { + +namespace ndsrvp { + +int remap32f(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height, + uchar* dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step, + float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4]) +{ + const bool isRelative = ((interpolation & CV_HAL_WARP_RELATIVE_MAP) != 0); + interpolation &= ~CV_HAL_WARP_RELATIVE_MAP; + + if( interpolation == CV_HAL_INTER_AREA ) + interpolation = CV_HAL_INTER_LINEAR; + + if( interpolation != CV_HAL_INTER_NEAREST ) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + // only CV_8U + if( (src_type & CV_MAT_DEPTH_MASK) != CV_8U ) + return CV_HAL_ERROR_NOT_IMPLEMENTED; + + int cn = CV_MAT_CN(src_type); + + src_step /= sizeof(uchar); + dst_step /= sizeof(uchar); + + // mapping CV_32FC1 + mapx_step /= sizeof(float); + mapy_step /= sizeof(float); + + // border + uchar border_const[CV_CN_MAX]; + for( int k = 0; k < CV_CN_MAX; k++ ) + border_const[k] = saturate_cast(border_value[k & 3]); + + // divide into blocks + const int BLOCK_SIZE = 1024; + int x, y, x1, y1; + std::array aXY; + short* XY = aXY.data(); + size_t XY_step = BLOCK_SIZE * 2; + + // vectorize + const int32x2_t src_wh = {src_width, src_height}; + const int32x2_t arr_index = {cn, (int)src_step}; + + for (y = 0; y < dst_height; y += BLOCK_SIZE) + { + int dy = std::min(BLOCK_SIZE, dst_height - y); + for (x = 0; x < dst_width; x += BLOCK_SIZE) + { + const int off_y = isRelative ? y : 0; + const int off_x = isRelative ? x : 0; + const int32x2_t voff = {off_x, off_y}; + + int dx = std::min(BLOCK_SIZE, dst_width - x); + // prepare mapping data XY + for (y1 = 0; y1 < dy; y1++) + { + short* rXY = XY + y1 * XY_step; + const float* sX = mapx + (y + y1) * mapx_step + x; + const float* sY = mapy + (y + y1) * mapy_step + x; + for (x1 = 0; x1 < dx; x1++) + { + rXY[x1 * 2] = saturate_cast(sX[x1]); + rXY[x1 * 2 + 1] = saturate_cast(sY[x1]); + } + } + + // precalulate offset + if(isRelative) + { + int16x8_t voff_x; + int16x8_t voff_y = {0, 0, 1, 0, 2, 0, 3, 0}; + int16x8_t vones_x = {4, 0, 4, 0, 4, 0, 4, 0}; + int16x8_t vones_y = {0, 1, 0, 1, 0, 1, 0, 1}; + for(y1 = 0; y1 < BLOCK_SIZE; y1++, voff_y += vones_y) + { + int16x8_t* vrXY = (int16x8_t*)(XY + y1 * XY_step); + for(x1 = 0, voff_x = voff_y; x1 < BLOCK_SIZE; x1 += 4, vrXY++, voff_x += vones_x) + { + *vrXY += voff_x; + } + } + } + + // process the block + for( y1 = 0; y1 < dy; y1++ ) + { + uchar* dst_row = dst_data + (y + y1) * dst_step + x * cn; + const short* rXY = XY + y1 * XY_step; + if( cn == 1 ) + { + for( x1 = 0; x1 < dx; x1++ ) + { + int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff; + if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 ) + dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)]; + else + { + if( border_type == CV_HAL_BORDER_REPLICATE ) + { + vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh); + dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)]; + } + else if( border_type == CV_HAL_BORDER_CONSTANT ) + dst_row[x1] = border_const[0]; + else if( border_type != CV_HAL_BORDER_TRANSPARENT ) + { + vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type); + vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type); + dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)]; + } + } + } + } + else + { + uchar* dst_ptr = dst_row; + for(x1 = 0; x1 < dx; x1++, dst_ptr += cn ) + { + int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff; + const uchar *src_ptr; + if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 ) + { + if( cn == 3 ) + { + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2]; + // performance loss, commented out + // *(unsigned*)dst_ptr = __nds__bpick(*(unsigned*)dst_ptr, *(unsigned*)src_ptr, 0xFF000000); + } + else if( cn == 4 ) + { + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + *(uint8x4_t*)dst_ptr = *(uint8x4_t*)src_ptr; + } + else + { + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + int k = cn; + for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8) + *(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr; + while( k-- ) + dst_ptr[k] = src_ptr[k]; + } + } + else if( border_type != CV_HAL_BORDER_TRANSPARENT ) + { + if( border_type == CV_HAL_BORDER_REPLICATE ) + { + vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh); + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + } + else if( border_type == CV_HAL_BORDER_CONSTANT ) + src_ptr = &border_const[0]; + else + { + vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type); + vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type); + src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index); + } + int k = cn; + for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8) + *(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr; + while( k-- ) + dst_ptr[k] = src_ptr[k]; + } + } + } + } + } + } + + return CV_HAL_ERROR_OK; +} + +} // namespace ndsrvp + +} // namespace cv diff --git a/3rdparty/ndsrvp/src/threshold.cpp b/3rdparty/ndsrvp/src/threshold.cpp index 06de591fef..0812100311 100644 --- a/3rdparty/ndsrvp/src/threshold.cpp +++ b/3rdparty/ndsrvp/src/threshold.cpp @@ -4,65 +4,44 @@ #include "ndsrvp_hal.hpp" #include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { namespace ndsrvp { template -class operators_threshold_t { -public: - virtual ~operators_threshold_t() {}; - virtual inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) - { - (void)src; - (void)thresh; - (void)maxval; - CV_Error(cv::Error::StsBadArg, ""); - return vtype(); - } - virtual inline type scalar(const type& src, const type& thresh, const type& maxval) - { - (void)src; - (void)thresh; - (void)maxval; - CV_Error(cv::Error::StsBadArg, ""); - return type(); - } -}; - -template -class opThreshBinary : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshBinary_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { return (vtype)__nds__bpick((long)maxval, (long)0, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { return src > thresh ? maxval : 0; } }; template -class opThreshBinaryInv : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshBinaryInv_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { return (vtype)__nds__bpick((long)0, (long)maxval, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { return src > thresh ? 0 : maxval; } }; template -class opThreshTrunc : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshTrunc_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { (void)maxval; return (vtype)__nds__bpick((long)thresh, (long)src, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { (void)maxval; return src > thresh ? thresh : src; @@ -70,13 +49,13 @@ class opThreshTrunc : public operators_threshold_t { }; template -class opThreshToZero : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshToZero_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { (void)maxval; return (vtype)__nds__bpick((long)src, (long)0, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { (void)maxval; return src > thresh ? src : 0; @@ -84,29 +63,36 @@ class opThreshToZero : public operators_threshold_t { }; template -class opThreshToZeroInv : public operators_threshold_t { - inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override +struct opThreshToZeroInv_t { + inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) { (void)maxval; return (vtype)__nds__bpick((long)0, (long)src, (long)(src > thresh)); } - inline type scalar(const type& src, const type& thresh, const type& maxval) override + inline type scalar(const type& src, const type& thresh, const type& maxval) { (void)maxval; return src > thresh ? 0 : src; } }; -template -static void threshold_op(const type* src_data, size_t src_step, - type* dst_data, size_t dst_step, +template typename opThresh_t> +static inline void threshold_op(const uchar* src, size_t src_step, + uchar* dst, size_t dst_step, int width, int height, int cn, - type thresh, type maxval, int thtype) + double thresh_d, double maxval_d) { int i, j; width *= cn; + + type* src_data = (type*)src; + type* dst_data = (type*)dst; src_step /= sizeof(type); dst_step /= sizeof(type); + + type thresh = saturate_cast(thresh_d); + type maxval = saturate_cast(maxval_d); vtype vthresh; vtype vmaxval; for (i = 0; i < nlane; i++) { @@ -114,62 +100,63 @@ static void threshold_op(const type* src_data, size_t src_step, vmaxval[i] = maxval; } - operators_threshold_t* op; - switch (thtype) { - case CV_HAL_THRESH_BINARY: - op = new opThreshBinary(); - break; - case CV_HAL_THRESH_BINARY_INV: - op = new opThreshBinaryInv(); - break; - case CV_HAL_THRESH_TRUNC: - op = new opThreshTrunc(); - break; - case CV_HAL_THRESH_TOZERO: - op = new opThreshToZero(); - break; - case CV_HAL_THRESH_TOZERO_INV: - op = new opThreshToZeroInv(); - break; - default: - CV_Error(cv::Error::StsBadArg, ""); - return; - } + opThresh_t opThresh; for (i = 0; i < height; i++, src_data += src_step, dst_data += dst_step) { for (j = 0; j <= width - nlane; j += nlane) { - vtype vs = *(vtype*)(src_data + j); - *(vtype*)(dst_data + j) = op->vector(vs, vthresh, vmaxval); + *(vtype*)(dst_data + j) = opThresh.vector(*(vtype*)(src_data + j), vthresh, vmaxval); } for (; j < width; j++) { - dst_data[j] = op->scalar(src_data[j], thresh, maxval); + dst_data[j] = opThresh.scalar(src_data[j], thresh, maxval); } } - delete op; return; } +typedef void (*ThreshFunc)(const uchar* src_data, size_t src_step, + uchar* dst_data, size_t dst_step, + int width, int height, int cn, + double thresh, double maxval); + int threshold(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int depth, int cn, double thresh, double maxValue, int thresholdType) { - if (width <= 255 && height <= 255) // slower at small size - return CV_HAL_ERROR_NOT_IMPLEMENTED; - if (depth == CV_8U) { - threshold_op((uchar*)src_data, src_step, (uchar*)dst_data, dst_step, width, height, cn, (uchar)thresh, (uchar)maxValue, thresholdType); - return CV_HAL_ERROR_OK; - } else if (depth == CV_16S) { - threshold_op((short*)src_data, src_step, (short*)dst_data, dst_step, width, height, cn, (short)thresh, (short)maxValue, thresholdType); - return CV_HAL_ERROR_OK; - } else if (depth == CV_16U) { - threshold_op((ushort*)src_data, src_step, (ushort*)dst_data, dst_step, width, height, cn, (ushort)thresh, (ushort)maxValue, thresholdType); - return CV_HAL_ERROR_OK; - } else { + static ThreshFunc thfuncs[4][5] = + { + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op }, + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op }, + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op }, + { + threshold_op, + threshold_op, + threshold_op, + threshold_op, + threshold_op } + }; + + if(depth < 0 || depth > 3 || thresholdType < 0 || thresholdType > 4 || (width < 256 && height < 256)) return CV_HAL_ERROR_NOT_IMPLEMENTED; - } - return CV_HAL_ERROR_NOT_IMPLEMENTED; + + thfuncs[depth][thresholdType](src_data, src_step, dst_data, dst_step, width, height, cn, thresh, maxValue); + return CV_HAL_ERROR_OK; } } // namespace ndsrvp diff --git a/3rdparty/ndsrvp/src/warpAffine.cpp b/3rdparty/ndsrvp/src/warpAffine.cpp index d54e4dc237..4257361d1d 100644 --- a/3rdparty/ndsrvp/src/warpAffine.cpp +++ b/3rdparty/ndsrvp/src/warpAffine.cpp @@ -3,148 +3,68 @@ // of this distribution and at http://opencv.org/license.html. #include "ndsrvp_hal.hpp" -#include "opencv2/core.hpp" #include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { namespace ndsrvp { -class WarpAffineInvoker : public ParallelLoopBody { -public: - WarpAffineInvoker(const Mat& _src, Mat& _dst, int _interpolation, int _borderType, - const Scalar& _borderValue, int* _adelta, int* _bdelta, const double* _M) - : ParallelLoopBody() - , src(_src) - , dst(_dst) - , interpolation(_interpolation) - , borderType(_borderType) - , borderValue(_borderValue) - , adelta(_adelta) - , bdelta(_bdelta) - , M(_M) - { +int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) +{ + const int AB_BITS = MAX(10, (int)INTER_BITS); + int x1 = 0; + + for (; x1 < bw; x1 += 2) { + int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] }; + int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] }; + + vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15); + vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); } - virtual void operator()(const Range& range) const CV_OVERRIDE - { - const int BLOCK_SZ = 64; - AutoBuffer __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ); - short *XY = __XY.data(), *A = __A.data(); - const int AB_BITS = MAX(10, (int)INTER_BITS); - const int AB_SCALE = 1 << AB_BITS; - int round_delta = interpolation == CV_HAL_INTER_NEAREST ? AB_SCALE / 2 : AB_SCALE / INTER_TAB_SIZE / 2, x, y, x1, y1; - - int bh0 = std::min(BLOCK_SZ / 2, dst.rows); - int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, dst.cols); - bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, dst.rows); - - for (y = range.start; y < range.end; y += bh0) { - for (x = 0; x < dst.cols; x += bw0) { - int bw = std::min(bw0, dst.cols - x); - int bh = std::min(bh0, range.end - y); - - Mat _XY(bh, bw, CV_16SC2, XY); - Mat dpart(dst, Rect(x, y, bw, bh)); - - for (y1 = 0; y1 < bh; y1++) { - short* xy = XY + y1 * bw * 2; - int X0 = saturate_cast((M[1] * (y + y1) + M[2]) * AB_SCALE) + round_delta; - int Y0 = saturate_cast((M[4] * (y + y1) + M[5]) * AB_SCALE) + round_delta; - - if (interpolation == CV_HAL_INTER_NEAREST) { - x1 = 0; - - for (; x1 < bw; x1 += 2) { - int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] }; - int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] }; - - vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15); - vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); - } - - for (; x1 < bw; x1++) { - int X = (X0 + adelta[x + x1]) >> AB_BITS; - int Y = (Y0 + bdelta[x + x1]) >> AB_BITS; - xy[x1 * 2] = saturate_cast(X); - xy[x1 * 2 + 1] = saturate_cast(Y); - } - } else { - short* alpha = A + y1 * bw; - x1 = 0; - - const int INTER_MASK = INTER_TAB_SIZE - 1; - const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; - for (; x1 < bw; x1 += 2) { - int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] }; - int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] }; - vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS)); - vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS)); - - int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); - int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); - - uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); - *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; - } - - for (; x1 < bw; x1++) { - int X = (X0 + adelta[x + x1]) >> (AB_BITS - INTER_BITS); - int Y = (Y0 + bdelta[x + x1]) >> (AB_BITS - INTER_BITS); - xy[x1 * 2] = saturate_cast(X >> INTER_BITS); - xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1))); - } - } - } - - if (interpolation == CV_HAL_INTER_NEAREST) - remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue); - else { - Mat _matA(bh, bw, CV_16U, A); - remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue); - } - } - } + for (; x1 < bw; x1++) { + int X = X0 + adelta[x1]; + int Y = Y0 + bdelta[x1]; + xy[x1 * 2] = saturate_cast(X); + xy[x1 * 2 + 1] = saturate_cast(Y); } -private: - Mat src; - Mat dst; - int interpolation, borderType; - Scalar borderValue; - int *adelta, *bdelta; - const double* M; -}; - -int warpAffine(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[6], int interpolation, int borderType, const double borderValue[4]) -{ - Mat src(Size(src_width, src_height), src_type, const_cast(src_data), src_step); - Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step); + return CV_HAL_ERROR_OK; +} - int x; - AutoBuffer _abdelta(dst.cols * 2); - int *adelta = &_abdelta[0], *bdelta = adelta + dst.cols; +int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) +{ const int AB_BITS = MAX(10, (int)INTER_BITS); - const int AB_SCALE = 1 << AB_BITS; + int x1 = 0; + + const int INTER_MASK = INTER_TAB_SIZE - 1; + const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; + for (; x1 < bw; x1 += 2) { + int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] }; + int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] }; + vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS)); + vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS)); + + int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); + int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); + + uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); + *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; + } - for (x = 0; x < dst.cols; x++) { - adelta[x] = saturate_cast(M[0] * x * AB_SCALE); - bdelta[x] = saturate_cast(M[3] * x * AB_SCALE); + for (; x1 < bw; x1++) { + int X = X0 + adelta[x1]; + int Y = Y0 + bdelta[x1]; + xy[x1 * 2] = saturate_cast(X >> INTER_BITS); + xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK)); } - Range range(0, dst.rows); - WarpAffineInvoker invoker(src, dst, interpolation, borderType, - Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]), - adelta, bdelta, M); - parallel_for_(range, invoker, dst.total() / (double)(1 << 16)); return CV_HAL_ERROR_OK; } diff --git a/3rdparty/ndsrvp/src/warpPerspective.cpp b/3rdparty/ndsrvp/src/warpPerspective.cpp index b4fa423ed7..40e44729d9 100644 --- a/3rdparty/ndsrvp/src/warpPerspective.cpp +++ b/3rdparty/ndsrvp/src/warpPerspective.cpp @@ -3,154 +3,90 @@ // of this distribution and at http://opencv.org/license.html. #include "ndsrvp_hal.hpp" -#include "opencv2/core.hpp" #include "opencv2/imgproc/hal/interface.h" +#include "cvutils.hpp" namespace cv { namespace ndsrvp { -class WarpPerspectiveInvoker : public ParallelLoopBody { -public: - WarpPerspectiveInvoker(const Mat& _src, Mat& _dst, const double* _M, int _interpolation, - int _borderType, const Scalar& _borderValue) - : ParallelLoopBody() - , src(_src) - , dst(_dst) - , M(_M) - , interpolation(_interpolation) - , borderType(_borderType) - , borderValue(_borderValue) - { +int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) +{ + int x1 = 0; + + for (; x1 < bw; x1 += 2) { + double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; + W1 = W1 ? 1. / W1 : 0; + W2 = W2 ? 1. / W2 : 0; + double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); + double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); + double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); + double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); + + int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; + int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; + + vX = __nds__v_sclip32(vX, 15); + vY = __nds__v_sclip32(vY, 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); } - virtual void operator()(const Range& range) const CV_OVERRIDE - { - const int BLOCK_SZ = 32; - short XY[BLOCK_SZ * BLOCK_SZ * 2], A[BLOCK_SZ * BLOCK_SZ]; - int x, y, y1, width = dst.cols, height = dst.rows; - - int bh0 = std::min(BLOCK_SZ / 2, height); - int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, width); - bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, height); - - for (y = range.start; y < range.end; y += bh0) { - for (x = 0; x < width; x += bw0) { - int bw = std::min(bw0, width - x); - int bh = std::min(bh0, range.end - y); // height - - Mat _XY(bh, bw, CV_16SC2, XY); - Mat dpart(dst, Rect(x, y, bw, bh)); - - for (y1 = 0; y1 < bh; y1++) { - short* xy = XY + y1 * bw * 2; - double X0 = M[0] * x + M[1] * (y + y1) + M[2]; - double Y0 = M[3] * x + M[4] * (y + y1) + M[5]; - double W0 = M[6] * x + M[7] * (y + y1) + M[8]; - - if (interpolation == CV_HAL_INTER_NEAREST) { - int x1 = 0; - - for (; x1 < bw; x1 += 2) { - double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; - W1 = W1 ? 1. / W1 : 0; - W2 = W2 ? 1. / W2 : 0; - double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); - double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); - double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); - double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); - - int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; - int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; - - vX = __nds__v_sclip32(vX, 15); - vY = __nds__v_sclip32(vY, 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX); - } - - for (; x1 < bw; x1++) { - double W = W0 + M[6] * x1; - W = W ? 1. / W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1 * 2] = saturate_cast(X); - xy[x1 * 2 + 1] = saturate_cast(Y); - } - } else { - short* alpha = A + y1 * bw; - int x1 = 0; - - const int INTER_MASK = INTER_TAB_SIZE - 1; - const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; - for (; x1 < bw; x1 += 2) { - double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; - W1 = W1 ? INTER_TAB_SIZE / W1 : 0; - W2 = W2 ? INTER_TAB_SIZE / W2 : 0; - double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); - double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); - double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); - double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); - - int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; - int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; - - int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); - int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); - - *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); - - uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); - *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; - } - - for (; x1 < bw; x1++) { - double W = W0 + M[6] * x1; - W = W ? INTER_TAB_SIZE / W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1 * 2] = saturate_cast(X >> INTER_BITS); - xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1))); - } - } - } - - if (interpolation == CV_HAL_INTER_NEAREST) - remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue); - else { - Mat _matA(bh, bw, CV_16U, A); - remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue); - } - } - } + for (; x1 < bw; x1++) { + double W = W0 + M[6] * x1; + W = W ? 1. / W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1 * 2] = saturate_cast(X); + xy[x1 * 2 + 1] = saturate_cast(Y); } -private: - Mat src; - Mat dst; - const double* M; - int interpolation, borderType; - Scalar borderValue; -}; - -int warpPerspective(int src_type, - const uchar* src_data, size_t src_step, int src_width, int src_height, - uchar* dst_data, size_t dst_step, int dst_width, int dst_height, - const double M[9], int interpolation, int borderType, const double borderValue[4]) + return CV_HAL_ERROR_OK; +} + +int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) { - Mat src(Size(src_width, src_height), src_type, const_cast(src_data), src_step); - Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step); + int x1 = 0; + + const int INTER_MASK = INTER_TAB_SIZE - 1; + const uint32x2_t vmask = { INTER_MASK, INTER_MASK }; + for (; x1 < bw; x1 += 2) { + double W1 = W0 + M[6] * x1, W2 = W1 + M[6]; + W1 = W1 ? INTER_TAB_SIZE / W1 : 0; + W2 = W2 ? INTER_TAB_SIZE / W2 : 0; + double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1)); + double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2)); + double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1)); + double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2)); + + int32x2_t vX = {saturate_cast(fX1), saturate_cast(fX2)}; + int32x2_t vY = {saturate_cast(fY1), saturate_cast(fY2)}; + + int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15); + int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15); + + *(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx); + + uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask)); + *(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) }; + } + + for (; x1 < bw; x1++) { + double W = W0 + M[6] * x1; + W = W ? INTER_TAB_SIZE / W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1 * 2] = saturate_cast(X >> INTER_BITS); + xy[x1 * 2 + 1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK)); + } - Range range(0, dst.rows); - WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3])); - parallel_for_(range, invoker, dst.total() / (double)(1 << 16)); return CV_HAL_ERROR_OK; } diff --git a/CMakeLists.txt b/CMakeLists.txt index 0693731a8b..29d05cd86b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1040,7 +1040,7 @@ foreach(hal ${OpenCV_HAL}) ocv_hal_register(NDSRVP_HAL_LIBRARIES NDSRVP_HAL_HEADERS NDSRVP_HAL_INCLUDE_DIRS) list(APPEND OpenCV_USED_HAL "ndsrvp (ver ${NDSRVP_HAL_VERSION})") else() - message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not open, disabling ndsrvp...") + message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not enabled, disabling ndsrvp...") endif() elseif(hal STREQUAL "halrvv") if(";${CPU_BASELINE_FINAL};" MATCHES ";RVV;") diff --git a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp index 48851ece07..d4b0f3fbb9 100644 --- a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp +++ b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp @@ -108,11 +108,19 @@ CV_EXPORTS void warpAffine(int src_type, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]); +CV_EXPORTS void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw); + +CV_EXPORTS void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw); + CV_EXPORTS void warpPerspective(int src_type, const uchar * src_data, size_t src_step, int src_width, int src_height, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]); +CV_EXPORTS void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw); + +CV_EXPORTS void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw); + CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, diff --git a/modules/imgproc/include/opencv2/imgproc/hal/interface.h b/modules/imgproc/include/opencv2/imgproc/hal/interface.h index f8dbcfe791..8e485b9fca 100644 --- a/modules/imgproc/include/opencv2/imgproc/hal/interface.h +++ b/modules/imgproc/include/opencv2/imgproc/hal/interface.h @@ -12,6 +12,12 @@ #define CV_HAL_INTER_CUBIC 2 #define CV_HAL_INTER_AREA 3 #define CV_HAL_INTER_LANCZOS4 4 +#define CV_HAL_INTER_LINEAR_EXACT 5 +#define CV_HAL_INTER_NEAREST_EXACT 6 +#define CV_HAL_INTER_MAX 7 +#define CV_HAL_WARP_FILL_OUTLIERS 8 +#define CV_HAL_WARP_INVERSE_MAP 16 +#define CV_HAL_WARP_RELATIVE_MAP 32 //! @} //! @name Morphology operations diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp index 773fed9b48..ceb6c8b0f6 100644 --- a/modules/imgproc/src/hal_replacement.hpp +++ b/modules/imgproc/src/hal_replacement.hpp @@ -273,6 +273,29 @@ inline int hal_ni_resize(int src_type, const uchar *src_data, size_t src_step, i @sa cv::warpAffine, cv::hal::warpAffine */ inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpAffineBlocklineNN doing a row of affine transformation + @param adelta input M0 * x array + @param bdelta input M3 * x array + @param xy output (x', y') coordinates + @param X0 input M1 * y + M2 value + @param Y0 input M4 * y + M5 value + @param bw length of the row + @sa cv::warpAffineBlocklineNN, cv::hal::warpAffineBlocklineNN + */ +inline int hal_ni_warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpAffineBlockline doing a row of affine transformation + @param adelta input M0 * x array + @param bdelta input M3 * x array + @param xy output (x', y') coordinates + @param alpha output least significant bits of the (x', y') coordinates for interpolation + @param X0 input M1 * y + M2 value + @param Y0 input M4 * y + M5 value + @param bw length of the row + @sa cv::warpAffineBlockline, cv::hal::warpAffineBlockline + */ +inline int hal_ni_warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } /** @brief hal_warpPerspective @param src_type source and destination image type @@ -291,11 +314,38 @@ inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_ste @sa cv::warpPerspective, cv::hal::warpPerspective */ inline int hal_ni_warpPerspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpPerspectiveBlocklineNN doing a row of perspective transformation + @param M 3x3 matrix with transform coefficients + @param xy output (x', y') coordinates + @param X0 input M0 * x0 + M1 * y + M2 value + @param Y0 input M3 * x0 + M4 * y + M5 value + @param W0 input M6 * x0 + M7 * y + M8 value + @param bw length of the row + @sa cv::warpPerspectiveBlocklineNN, cv::hal::warpPerspectiveBlocklineNN + */ +inline int hal_ni_warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpPerspectiveBlockline doing a row of perspective transformation + @param M 3x3 matrix with transform coefficients + @param xy output (x', y') coordinates + @param alpha output least significant bits of the (x', y') coordinates for interpolation + @param X0 input M0 * x0 + M1 * y + M2 value + @param Y0 input M3 * x0 + M4 * y + M5 value + @param W0 input M6 * x0 + M7 * y + M8 value + @param bw length of the row + @sa cv::warpPerspectiveBlockline, cv::hal::warpPerspectiveBlockline + */ +inline int hal_ni_warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } //! @cond IGNORED #define cv_hal_resize hal_ni_resize #define cv_hal_warpAffine hal_ni_warpAffine +#define cv_hal_warpAffineBlocklineNN hal_ni_warpAffineBlocklineNN +#define cv_hal_warpAffineBlockline hal_ni_warpAffineBlockline #define cv_hal_warpPerspective hal_ni_warpPerspective +#define cv_hal_warpPerspectiveBlocklineNN hal_ni_warpPerspectiveBlocklineNN +#define cv_hal_warpPerspectiveBlockline hal_ni_warpPerspectiveBlockline //! @endcond /** diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index c0eaf8114c..4e4d718da3 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2268,16 +2268,7 @@ public: short *XY = __XY.data(), *A = __A.data(); const int AB_BITS = MAX(10, (int)INTER_BITS); const int AB_SCALE = 1 << AB_BITS; - int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1; - #if CV_TRY_AVX2 - bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2; - #endif - #if CV_TRY_SSE4_1 - bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1; - #endif - #if CV_TRY_LASX - bool useLASX = CV_CPU_HAS_SUPPORT_LASX; - #endif + int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, y1; int bh0 = std::min(BLOCK_SZ/2, dst.rows); int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols); @@ -2300,84 +2291,9 @@ public: int Y0 = saturate_cast((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta; if( interpolation == INTER_NEAREST ) - { - x1 = 0; - #if CV_TRY_SSE4_1 - if( useSSE4_1 ) - opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta + x, bdelta + x, xy, X0, Y0, bw); - else - #endif - { - #if CV_SIMD128 - { - v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0); - int span = VTraits::vlanes(); - for( ; x1 <= bw - span; x1 += span ) - { - v_int16x8 v_dst[2]; - #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr(v_add(shift,v_load(ptr + offset))),\ - v_shr(v_add(shift,v_load(ptr + offset + 4)))) - v_dst[0] = CV_CONVERT_MAP(adelta, x+x1, v_X0); - v_dst[1] = CV_CONVERT_MAP(bdelta, x+x1, v_Y0); - #undef CV_CONVERT_MAP - v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]); - } - } - #endif - for( ; x1 < bw; x1++ ) - { - int X = (X0 + adelta[x+x1]) >> AB_BITS; - int Y = (Y0 + bdelta[x+x1]) >> AB_BITS; - xy[x1*2] = saturate_cast(X); - xy[x1*2+1] = saturate_cast(Y); - } - } - } + hal::warpAffineBlocklineNN(adelta + x, bdelta + x, xy, X0, Y0, bw); else - { - short* alpha = A + y1*bw; - x1 = 0; - #if CV_TRY_AVX2 - if ( useAVX2 ) - x1 = opt_AVX2::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw); - #endif - #if CV_TRY_LASX - if ( useLASX ) - x1 = opt_LASX::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw); - #endif - #if CV_SIMD128 - { - v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0); - v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1); - int span = VTraits::vlanes(); - for( ; x1 <= bw - span * 2; x1 += span * 2 ) - { - v_int32x4 v_X0 = v_shr(v_add(v__X0, v_load(this->adelta + x + x1))); - v_int32x4 v_Y0 = v_shr(v_add(v__Y0, v_load(this->bdelta + x + x1))); - v_int32x4 v_X1 = v_shr(v_add(v__X0, v_load(this->adelta + x + x1 + span))); - v_int32x4 v_Y1 = v_shr(v_add(v__Y0, v_load(this->bdelta + x + x1 + span))); - - v_int16x8 v_xy[2]; - v_xy[0] = v_pack(v_shr(v_X0), v_shr(v_X1)); - v_xy[1] = v_pack(v_shr(v_Y0), v_shr(v_Y1)); - v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]); - - v_int32x4 v_alpha0 = v_or(v_shl(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask)); - v_int32x4 v_alpha1 = v_or(v_shl(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask)); - v_store(alpha + x1, v_pack(v_alpha0, v_alpha1)); - } - } - #endif - for( ; x1 < bw; x1++ ) - { - int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS); - int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS); - xy[x1*2] = saturate_cast(X >> INTER_BITS); - xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + - (X & (INTER_TAB_SIZE-1))); - } - } + hal::warpAffineBlockline(adelta + x, bdelta + x, xy, A + y1*bw, X0, Y0, bw); } if( interpolation == INTER_NEAREST ) @@ -2802,6 +2718,97 @@ void warpAffine(int src_type, parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } +void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) +{ + CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw); + + const int AB_BITS = MAX(10, (int)INTER_BITS); + int x1 = 0; + #if CV_TRY_SSE4_1 + bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1; + if( useSSE4_1 ) + opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta, bdelta, xy, X0, Y0, bw); + else + #endif + { + #if CV_SIMD128 + { + v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0); + int span = VTraits::vlanes(); + for( ; x1 <= bw - span; x1 += span ) + { + v_int16x8 v_dst[2]; + #define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr(v_add(shift,v_load(ptr + offset))),\ + v_shr(v_add(shift,v_load(ptr + offset + 4)))) + v_dst[0] = CV_CONVERT_MAP(adelta, x1, v_X0); + v_dst[1] = CV_CONVERT_MAP(bdelta, x1, v_Y0); + #undef CV_CONVERT_MAP + v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]); + } + } + #endif + for( ; x1 < bw; x1++ ) + { + int X = (X0 + adelta[x1]) >> AB_BITS; + int Y = (Y0 + bdelta[x1]) >> AB_BITS; + xy[x1*2] = saturate_cast(X); + xy[x1*2+1] = saturate_cast(Y); + } + } +} + +void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) +{ + CALL_HAL(warpAffineBlockline, cv_hal_warpAffineBlockline, adelta, bdelta, xy, alpha, X0, Y0, bw); + + const int AB_BITS = MAX(10, (int)INTER_BITS); + int x1 = 0; + #if CV_TRY_AVX2 + bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2; + if ( useAVX2 ) + x1 = opt_AVX2::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw); + #endif + #if CV_TRY_LASX + bool useLASX = CV_CPU_HAS_SUPPORT_LASX; + if ( useLASX ) + x1 = opt_LASX::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw); + #endif + { + #if CV_SIMD128 + { + v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0); + v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1); + int span = VTraits::vlanes(); + for( ; x1 <= bw - span * 2; x1 += span * 2 ) + { + v_int32x4 v_X0 = v_shr(v_add(v__X0, v_load(adelta + x1))); + v_int32x4 v_Y0 = v_shr(v_add(v__Y0, v_load(bdelta + x1))); + v_int32x4 v_X1 = v_shr(v_add(v__X0, v_load(adelta + x1 + span))); + v_int32x4 v_Y1 = v_shr(v_add(v__Y0, v_load(bdelta + x1 + span))); + + v_int16x8 v_xy[2]; + v_xy[0] = v_pack(v_shr(v_X0), v_shr(v_X1)); + v_xy[1] = v_pack(v_shr(v_Y0), v_shr(v_Y1)); + v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]); + + v_int32x4 v_alpha0 = v_or(v_shl(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask)); + v_int32x4 v_alpha1 = v_or(v_shl(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask)); + v_store(alpha + x1, v_pack(v_alpha0, v_alpha1)); + } + } + #endif + for( ; x1 < bw; x1++ ) + { + int X = (X0 + adelta[x1]) >> (AB_BITS - INTER_BITS); + int Y = (Y0 + bdelta[x1]) >> (AB_BITS - INTER_BITS); + xy[x1*2] = saturate_cast(X >> INTER_BITS); + xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + + (X & (INTER_TAB_SIZE-1))); + } + } +} + } // hal:: } // cv:: @@ -3204,12 +3211,6 @@ public: int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width); bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height); - #if CV_TRY_SSE4_1 - Ptr pwarp_impl_sse4; - if(CV_CPU_HAS_SUPPORT_SSE4_1) - pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M); - #endif - for( y = range.start; y < range.end; y += bh0 ) { for( x = 0; x < width; x += bw0 ) @@ -3228,57 +3229,9 @@ public: double W0 = M[6]*x + M[7]*(y + y1) + M[8]; if( interpolation == INTER_NEAREST ) - { - #if CV_TRY_SSE4_1 - if (pwarp_impl_sse4) - pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw); - else - #endif - #if CV_SIMD128_64F - WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw); - #else - for( int x1 = 0; x1 < bw; x1++ ) - { - double W = W0 + M[6]*x1; - W = W ? 1./W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1*2] = saturate_cast(X); - xy[x1*2+1] = saturate_cast(Y); - } - #endif - } + hal::warpPerspectiveBlocklineNN(M, xy, X0, Y0, W0, bw); else - { - short* alpha = A + y1*bw; - - #if CV_TRY_SSE4_1 - if (pwarp_impl_sse4) - pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw); - else - #endif - #if CV_SIMD128_64F - WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw); - #else - for( int x1 = 0; x1 < bw; x1++ ) - { - double W = W0 + M[6]*x1; - W = W ? INTER_TAB_SIZE/W : 0; - double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); - double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); - int X = saturate_cast(fX); - int Y = saturate_cast(fY); - - xy[x1*2] = saturate_cast(X >> INTER_BITS); - xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); - alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + - (X & (INTER_TAB_SIZE-1))); - } - #endif - } + hal::warpPerspectiveBlockline(M, xy, A + y1*bw, X0, Y0, W0, bw); } if( interpolation == INTER_NEAREST ) @@ -3371,6 +3324,74 @@ void warpPerspective(int src_type, parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } +void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) +{ + CALL_HAL(warpPerspectiveBlocklineNN, cv_hal_warpPerspectiveBlocklineNN, M, xy, X0, Y0, W0, bw); + + #if CV_TRY_SSE4_1 + Ptr pwarp_impl_sse4; + if(CV_CPU_HAS_SUPPORT_SSE4_1) + pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M); + + if (pwarp_impl_sse4) + pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw); + else + #endif + { + #if CV_SIMD128_64F + WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw); + #else + for( int x1 = 0; x1 < bw; x1++ ) + { + double W = W0 + M[6]*x1; + W = W ? 1./W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1*2] = saturate_cast(X); + xy[x1*2+1] = saturate_cast(Y); + } + #endif + } +} + +void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) +{ + CALL_HAL(warpPerspectiveBlockline, cv_hal_warpPerspectiveBlockline, M, xy, alpha, X0, Y0, W0, bw); + + #if CV_TRY_SSE4_1 + Ptr pwarp_impl_sse4; + if(CV_CPU_HAS_SUPPORT_SSE4_1) + pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M); + + if (pwarp_impl_sse4) + pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw); + else + #endif + { + #if CV_SIMD128_64F + WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw); + #else + for( int x1 = 0; x1 < bw; x1++ ) + { + double W = W0 + M[6]*x1; + W = W ? INTER_TAB_SIZE/W : 0; + double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W)); + double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W)); + int X = saturate_cast(fX); + int Y = saturate_cast(fY); + + xy[x1*2] = saturate_cast(X >> INTER_BITS); + xy[x1*2+1] = saturate_cast(Y >> INTER_BITS); + alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE + + (X & (INTER_TAB_SIZE-1))); + } + #endif + } +} + } // hal:: } // cv::