Merge branch 4.x

pull/25998/head
Alexander Smorkalov 6 months ago
commit 7e8f2a1bc4
  1. 2
      3rdparty/ndsrvp/include/core.hpp
  2. 45
      3rdparty/ndsrvp/include/imgproc.hpp
  3. 5
      3rdparty/ndsrvp/ndsrvp_hal.hpp
  4. 78
      3rdparty/ndsrvp/src/cvutils.cpp
  5. 108
      3rdparty/ndsrvp/src/cvutils.hpp
  6. 2
      3rdparty/ndsrvp/src/integral.cpp
  7. 188
      3rdparty/ndsrvp/src/remap.cpp
  8. 147
      3rdparty/ndsrvp/src/threshold.cpp
  9. 174
      3rdparty/ndsrvp/src/warpAffine.cpp
  10. 208
      3rdparty/ndsrvp/src/warpPerspective.cpp
  11. 2
      CMakeLists.txt
  12. 4
      cmake/OpenCVCompilerOptimizations.cmake
  13. 16
      cmake/checks/cpu_sse2.cpp
  14. 17
      modules/3d/include/opencv2/3d.hpp
  15. 42
      modules/3d/src/fisheye.cpp
  16. 90
      modules/3d/test/test_fisheye.cpp
  17. 56
      modules/calib/src/calibinit.cpp
  18. 1
      modules/core/include/opencv2/core/cuda.hpp
  19. 7
      modules/core/src/cuda/gpu_mat.cu
  20. 5
      modules/core/src/cuda_gpu_mat.cpp
  21. 6
      modules/core/src/matrix.cpp
  22. 2
      modules/core/src/persistence_xml.cpp
  23. 145
      modules/core/test/test_allocator.cpp
  24. 18
      modules/core/test/test_io.cpp
  25. 2
      modules/core/test/test_utils.cpp
  26. 201
      modules/dnn/src/int8layers/quantization_utils.cpp
  27. 2
      modules/dnn/src/layers/elementwise_layers.cpp
  28. 2
      modules/dnn/src/layers/nary_eltwise_layers.cpp
  29. 16
      modules/dnn/src/onnx/onnx_importer.cpp
  30. 2
      modules/dnn/test/test_onnx_conformance.cpp
  31. 12
      modules/dnn/test/test_onnx_conformance_layer_filter__openvino.inl.hpp
  32. 6
      modules/dnn/test/test_onnx_conformance_layer_filter__vulkan_denylist.inl.hpp
  33. 1
      modules/dnn/test/test_onnx_conformance_layer_filter_opencv_cpu_denylist.inl.hpp
  34. 4
      modules/dnn/test/test_onnx_conformance_layer_filter_opencv_denylist.inl.hpp
  35. 8
      modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp16_denylist.inl.hpp
  36. 6
      modules/dnn/test/test_onnx_conformance_layer_filter_opencv_ocl_fp32_denylist.inl.hpp
  37. 4
      modules/dnn/test/test_onnx_conformance_layer_parser_denylist.inl.hpp
  38. 39
      modules/features2d/misc/java/test/ORBFeatureDetectorTest.java
  39. 54
      modules/imgcodecs/src/bitstrm.cpp
  40. 20
      modules/imgcodecs/src/bitstrm.hpp
  41. 38
      modules/imgcodecs/src/grfmt_bmp.cpp
  42. 34
      modules/imgcodecs/src/grfmt_pfm.cpp
  43. 8
      modules/imgcodecs/src/grfmt_pxm.cpp
  44. 18
      modules/imgcodecs/src/grfmt_sunras.cpp
  45. 10
      modules/imgcodecs/src/grfmt_tiff.cpp
  46. 4
      modules/imgcodecs/src/grfmt_tiff.hpp
  47. 6
      modules/imgproc/include/opencv2/imgproc.hpp
  48. 8
      modules/imgproc/include/opencv2/imgproc/hal/hal.hpp
  49. 6
      modules/imgproc/include/opencv2/imgproc/hal/interface.h
  50. 23
      modules/imgproc/src/color.cpp
  51. 18
      modules/imgproc/src/color.hpp
  52. 152
      modules/imgproc/src/color_yuv.dispatch.cpp
  53. 198
      modules/imgproc/src/hal_replacement.hpp
  54. 307
      modules/imgproc/src/imgwarp.cpp
  55. 6
      modules/imgproc/test/test_color.cpp
  56. 2
      modules/js/perf/perf_64bits.js
  57. 2
      modules/js/perf/perf_helpfunc.js
  58. 2
      modules/js/perf/perf_imgproc/perf_blur.js
  59. 2
      modules/js/perf/perf_imgproc/perf_cvtcolor.js
  60. 2
      modules/js/perf/perf_imgproc/perf_dilate.js
  61. 2
      modules/js/perf/perf_imgproc/perf_erode.js
  62. 2
      modules/js/perf/perf_imgproc/perf_filter2D.js
  63. 2
      modules/js/perf/perf_imgproc/perf_gaussianBlur.js
  64. 2
      modules/js/perf/perf_imgproc/perf_medianBlur.js
  65. 2
      modules/js/perf/perf_imgproc/perf_pyrDown.js
  66. 2
      modules/js/perf/perf_imgproc/perf_remap.js
  67. 2
      modules/js/perf/perf_imgproc/perf_resize.js
  68. 2
      modules/js/perf/perf_imgproc/perf_scharr.js
  69. 2
      modules/js/perf/perf_imgproc/perf_sobel.js
  70. 2
      modules/js/perf/perf_imgproc/perf_threshold.js
  71. 2
      modules/js/perf/perf_imgproc/perf_warpAffine.js
  72. 2
      modules/js/perf/perf_imgproc/perf_warpPerspective.js
  73. 15
      modules/python/src2/gen2.py
  74. 2
      modules/python/test/test_misc.py
  75. 2
      modules/videoio/cmake/detect_obsensor.cmake

@ -1,6 +1,6 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_NDSRVP_CORE_HPP
#define OPENCV_NDSRVP_CORE_HPP

@ -1,18 +1,12 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_NDSRVP_IMGPROC_HPP
#define OPENCV_NDSRVP_IMGPROC_HPP
namespace cv {
// ################ remap ################
void remap(InputArray _src, OutputArray _dst,
InputArray _map1, InputArray _map2,
int interpolation, int borderType, const Scalar& borderValue);
namespace ndsrvp {
enum InterpolationMasks {
@ -36,23 +30,36 @@ int integral(int depth, int sdepth, int sqdepth,
// ################ warpAffine ################
int warpAffine(int src_type,
const uchar* src_data, size_t src_step, int src_width, int src_height,
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[6], int interpolation, int borderType, const double borderValue[4]);
int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw);
#undef cv_hal_warpAffine
#define cv_hal_warpAffine (cv::ndsrvp::warpAffine)
#undef cv_hal_warpAffineBlocklineNN
#define cv_hal_warpAffineBlocklineNN (cv::ndsrvp::warpAffineBlocklineNN)
int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw);
#undef cv_hal_warpAffineBlockline
#define cv_hal_warpAffineBlockline (cv::ndsrvp::warpAffineBlockline)
// ################ warpPerspective ################
int warpPerspective(int src_type,
const uchar* src_data, size_t src_step, int src_width, int src_height,
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[9], int interpolation, int borderType, const double borderValue[4]);
int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw);
#undef cv_hal_warpPerspectiveBlocklineNN
#define cv_hal_warpPerspectiveBlocklineNN (cv::ndsrvp::warpPerspectiveBlocklineNN)
int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw);
#undef cv_hal_warpPerspectiveBlockline
#define cv_hal_warpPerspectiveBlockline (cv::ndsrvp::warpPerspectiveBlockline)
// ################ remap ################
int remap32f(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height,
uchar *dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step,
float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4]);
#undef cv_hal_warpPerspective
#define cv_hal_warpPerspective (cv::ndsrvp::warpPerspective)
#undef cv_hal_remap32f
#define cv_hal_remap32f (cv::ndsrvp::remap32f)
// ################ threshold ################

@ -1,13 +1,14 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_NDSRVP_HAL_HPP
#define OPENCV_NDSRVP_HAL_HPP
#include "opencv2/core/mat.hpp"
#include <nds_intrinsic.h>
#include "opencv2/core/hal/interface.h"
#include "include/core.hpp"
#include "include/imgproc.hpp"
#include "include/features2d.hpp"

@ -0,0 +1,78 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "cvutils.hpp"
namespace cv {
namespace ndsrvp {
// fastMalloc
// [0][1][2][3][4][5][6][7][8][9]
// ^udata
// ^adata
// ^adata[-1] == udata
void* fastMalloc(size_t size)
{
uchar* udata = (uchar*)malloc(size + sizeof(void*) + CV_MALLOC_ALIGN);
if(!udata)
ndsrvp_error(Error::StsNoMem, "fastMalloc(): Not enough memory");
uchar** adata = (uchar**)align((size_t)((uchar**)udata + 1), CV_MALLOC_ALIGN);
adata[-1] = udata;
return adata;
}
void fastFree(void* ptr)
{
if(ptr)
{
uchar* udata = ((uchar**)ptr)[-1];
if(!(udata < (uchar*)ptr && ((uchar*)ptr - udata) <= (ptrdiff_t)(sizeof(void*) + CV_MALLOC_ALIGN)))
ndsrvp_error(Error::StsBadArg, "fastFree(): Invalid memory block");
free(udata);
}
}
// borderInterpolate
int borderInterpolate(int p, int len, int borderType)
{
if( (unsigned)p < (unsigned)len )
;
else if( borderType == CV_HAL_BORDER_REPLICATE )
p = p < 0 ? 0 : len - 1;
else if( borderType == CV_HAL_BORDER_REFLECT || borderType == CV_HAL_BORDER_REFLECT_101 )
{
int delta = borderType == CV_HAL_BORDER_REFLECT_101;
if( len == 1 )
return 0;
do
{
if( p < 0 )
p = -p - 1 + delta;
else
p = len - 1 - (p - len) - delta;
}
while( (unsigned)p >= (unsigned)len );
}
else if( borderType == CV_HAL_BORDER_WRAP )
{
ndsrvp_assert(len > 0);
if( p < 0 )
p -= ((p - len + 1) / len) * len;
if( p >= len )
p %= len;
}
else if( borderType == CV_HAL_BORDER_CONSTANT )
p = -1;
else
ndsrvp_error(Error::StsBadArg, "borderInterpolate(): Unknown/unsupported border type");
return p;
}
} // namespace ndsrvp
} // namespace cv

@ -0,0 +1,108 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_NDSRVP_CVUTILS_HPP
#define OPENCV_NDSRVP_CVUTILS_HPP
#include <nds_intrinsic.h>
#include "opencv2/core/hal/interface.h"
#include <cstring>
#include <cmath>
#include <iostream>
#include <string>
#include <array>
#include <climits>
#include <algorithm>
// misc functions that not exposed to public interface
namespace cv {
namespace ndsrvp {
void* fastMalloc(size_t size);
void fastFree(void* ptr);
int borderInterpolate(int p, int len, int borderType);
#ifndef MAX
# define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif
#define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT)
#define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
#define CV_MALLOC_ALIGN 64
// error codes
enum Error{
StsNoMem = -4,
StsBadArg = -5,
StsAssert = -215
};
// output error
#define ndsrvp_assert(expr) { if(!(expr)) ndsrvp_error(Error::StsAssert, std::string(#expr)); }
inline void ndsrvp_error(int code, std::string msg = "")
{
std::cerr << "NDSRVP Error: code " << code << std::endl;
if(!msg.empty())
std::cerr << msg << std::endl;
if(code < 0)
throw code;
}
// clip & vclip
inline int clip(int x, int a, int b)
{
return x >= a ? (x < b ? x : b - 1) : a;
}
inline int32x2_t vclip(int32x2_t x, int32x2_t a, int32x2_t b)
{
return (int32x2_t)__nds__bpick((long)a, __nds__bpick((long)(b - 1), (long)x, (long)(x < b)), (long)(x >= a));
}
// saturate
template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(v); }
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
template<> inline uchar saturate_cast<uchar>(int v) { return __nds__uclip32(v, 8); }
template<> inline uchar saturate_cast<uchar>(float v) { return saturate_cast<uchar>((int)lrintf(v)); }
template<> inline uchar saturate_cast<uchar>(double v) { return saturate_cast<uchar>((int)lrint(v)); }
template<> inline char saturate_cast<char>(int v) { return __nds__sclip32(v, 7); }
template<> inline char saturate_cast<char>(float v) { return saturate_cast<char>((int)lrintf(v)); }
template<> inline char saturate_cast<char>(double v) { return saturate_cast<char>((int)lrint(v)); }
template<> inline ushort saturate_cast<ushort>(int v) { return __nds__uclip32(v, 16); }
template<> inline ushort saturate_cast<ushort>(float v) { return saturate_cast<ushort>((int)lrintf(v)); }
template<> inline ushort saturate_cast<ushort>(double v) { return saturate_cast<ushort>((int)lrint(v)); }
template<> inline short saturate_cast<short>(int v) { return __nds__sclip32(v, 15); }
template<> inline short saturate_cast<short>(float v) { return saturate_cast<short>((int)lrintf(v)); }
template<> inline short saturate_cast<short>(double v) { return saturate_cast<short>((int)lrint(v)); }
template<> inline int saturate_cast<int>(float v) { return (int)lrintf(v); }
template<> inline int saturate_cast<int>(double v) { return (int)lrint(v); }
// align
inline long align(size_t v, int n)
{
return (v + n - 1) & -n;
}
} // namespace ndsrvp
} // namespace cv
#endif

@ -3,6 +3,8 @@
// of this distribution and at http://opencv.org/license.html.
#include "ndsrvp_hal.hpp"
#include "opencv2/imgproc/hal/interface.h"
#include "cvutils.hpp"
namespace cv {

@ -0,0 +1,188 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "ndsrvp_hal.hpp"
#include "opencv2/imgproc/hal/interface.h"
#include "cvutils.hpp"
namespace cv {
namespace ndsrvp {
int remap32f(int src_type, const uchar* src_data, size_t src_step, int src_width, int src_height,
uchar* dst_data, size_t dst_step, int dst_width, int dst_height, float* mapx, size_t mapx_step,
float* mapy, size_t mapy_step, int interpolation, int border_type, const double border_value[4])
{
const bool isRelative = ((interpolation & CV_HAL_WARP_RELATIVE_MAP) != 0);
interpolation &= ~CV_HAL_WARP_RELATIVE_MAP;
if( interpolation == CV_HAL_INTER_AREA )
interpolation = CV_HAL_INTER_LINEAR;
if( interpolation != CV_HAL_INTER_NEAREST )
return CV_HAL_ERROR_NOT_IMPLEMENTED;
// only CV_8U
if( (src_type & CV_MAT_DEPTH_MASK) != CV_8U )
return CV_HAL_ERROR_NOT_IMPLEMENTED;
int cn = CV_MAT_CN(src_type);
src_step /= sizeof(uchar);
dst_step /= sizeof(uchar);
// mapping CV_32FC1
mapx_step /= sizeof(float);
mapy_step /= sizeof(float);
// border
uchar border_const[CV_CN_MAX];
for( int k = 0; k < CV_CN_MAX; k++ )
border_const[k] = saturate_cast<uchar>(border_value[k & 3]);
// divide into blocks
const int BLOCK_SIZE = 1024;
int x, y, x1, y1;
std::array<short, BLOCK_SIZE * BLOCK_SIZE * 2> aXY;
short* XY = aXY.data();
size_t XY_step = BLOCK_SIZE * 2;
// vectorize
const int32x2_t src_wh = {src_width, src_height};
const int32x2_t arr_index = {cn, (int)src_step};
for (y = 0; y < dst_height; y += BLOCK_SIZE)
{
int dy = std::min(BLOCK_SIZE, dst_height - y);
for (x = 0; x < dst_width; x += BLOCK_SIZE)
{
const int off_y = isRelative ? y : 0;
const int off_x = isRelative ? x : 0;
const int32x2_t voff = {off_x, off_y};
int dx = std::min(BLOCK_SIZE, dst_width - x);
// prepare mapping data XY
for (y1 = 0; y1 < dy; y1++)
{
short* rXY = XY + y1 * XY_step;
const float* sX = mapx + (y + y1) * mapx_step + x;
const float* sY = mapy + (y + y1) * mapy_step + x;
for (x1 = 0; x1 < dx; x1++)
{
rXY[x1 * 2] = saturate_cast<short>(sX[x1]);
rXY[x1 * 2 + 1] = saturate_cast<short>(sY[x1]);
}
}
// precalulate offset
if(isRelative)
{
int16x8_t voff_x;
int16x8_t voff_y = {0, 0, 1, 0, 2, 0, 3, 0};
int16x8_t vones_x = {4, 0, 4, 0, 4, 0, 4, 0};
int16x8_t vones_y = {0, 1, 0, 1, 0, 1, 0, 1};
for(y1 = 0; y1 < BLOCK_SIZE; y1++, voff_y += vones_y)
{
int16x8_t* vrXY = (int16x8_t*)(XY + y1 * XY_step);
for(x1 = 0, voff_x = voff_y; x1 < BLOCK_SIZE; x1 += 4, vrXY++, voff_x += vones_x)
{
*vrXY += voff_x;
}
}
}
// process the block
for( y1 = 0; y1 < dy; y1++ )
{
uchar* dst_row = dst_data + (y + y1) * dst_step + x * cn;
const short* rXY = XY + y1 * XY_step;
if( cn == 1 )
{
for( x1 = 0; x1 < dx; x1++ )
{
int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff;
if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 )
dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)];
else
{
if( border_type == CV_HAL_BORDER_REPLICATE )
{
vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh);
dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)];
}
else if( border_type == CV_HAL_BORDER_CONSTANT )
dst_row[x1] = border_const[0];
else if( border_type != CV_HAL_BORDER_TRANSPARENT )
{
vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type);
vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type);
dst_row[x1] = src_data[__nds__v_smar64(0, vsxy, arr_index)];
}
}
}
}
else
{
uchar* dst_ptr = dst_row;
for(x1 = 0; x1 < dx; x1++, dst_ptr += cn )
{
int32x2_t vsxy = (int32x2_t){rXY[x1 * 2], rXY[x1 * 2 + 1]} + voff;
const uchar *src_ptr;
if( (long)((uint32x2_t)vsxy < (uint32x2_t)src_wh) == -1 )
{
if( cn == 3 )
{
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2];
// performance loss, commented out
// *(unsigned*)dst_ptr = __nds__bpick(*(unsigned*)dst_ptr, *(unsigned*)src_ptr, 0xFF000000);
}
else if( cn == 4 )
{
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
*(uint8x4_t*)dst_ptr = *(uint8x4_t*)src_ptr;
}
else
{
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
int k = cn;
for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8)
*(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr;
while( k-- )
dst_ptr[k] = src_ptr[k];
}
}
else if( border_type != CV_HAL_BORDER_TRANSPARENT )
{
if( border_type == CV_HAL_BORDER_REPLICATE )
{
vsxy = vclip(vsxy, (int32x2_t){0, 0}, src_wh);
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
}
else if( border_type == CV_HAL_BORDER_CONSTANT )
src_ptr = &border_const[0];
else
{
vsxy[0] = borderInterpolate(vsxy[0], src_width, border_type);
vsxy[1] = borderInterpolate(vsxy[1], src_height, border_type);
src_ptr = (uchar*)__nds__v_smar64((long)src_data, vsxy, arr_index);
}
int k = cn;
for(; k >= 8; k -= 8, dst_ptr += 8, src_ptr += 8)
*(uint8x8_t*)dst_ptr = *(uint8x8_t*)src_ptr;
while( k-- )
dst_ptr[k] = src_ptr[k];
}
}
}
}
}
}
return CV_HAL_ERROR_OK;
}
} // namespace ndsrvp
} // namespace cv

@ -4,65 +4,44 @@
#include "ndsrvp_hal.hpp"
#include "opencv2/imgproc/hal/interface.h"
#include "cvutils.hpp"
namespace cv {
namespace ndsrvp {
template <typename type, typename vtype>
class operators_threshold_t {
public:
virtual ~operators_threshold_t() {};
virtual inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
{
(void)src;
(void)thresh;
(void)maxval;
CV_Error(cv::Error::StsBadArg, "");
return vtype();
}
virtual inline type scalar(const type& src, const type& thresh, const type& maxval)
{
(void)src;
(void)thresh;
(void)maxval;
CV_Error(cv::Error::StsBadArg, "");
return type();
}
};
template <typename type, typename vtype>
class opThreshBinary : public operators_threshold_t<type, vtype> {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
struct opThreshBinary_t {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
{
return (vtype)__nds__bpick((long)maxval, (long)0, (long)(src > thresh));
}
inline type scalar(const type& src, const type& thresh, const type& maxval) override
inline type scalar(const type& src, const type& thresh, const type& maxval)
{
return src > thresh ? maxval : 0;
}
};
template <typename type, typename vtype>
class opThreshBinaryInv : public operators_threshold_t<type, vtype> {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
struct opThreshBinaryInv_t {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
{
return (vtype)__nds__bpick((long)0, (long)maxval, (long)(src > thresh));
}
inline type scalar(const type& src, const type& thresh, const type& maxval) override
inline type scalar(const type& src, const type& thresh, const type& maxval)
{
return src > thresh ? 0 : maxval;
}
};
template <typename type, typename vtype>
class opThreshTrunc : public operators_threshold_t<type, vtype> {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
struct opThreshTrunc_t {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
{
(void)maxval;
return (vtype)__nds__bpick((long)thresh, (long)src, (long)(src > thresh));
}
inline type scalar(const type& src, const type& thresh, const type& maxval) override
inline type scalar(const type& src, const type& thresh, const type& maxval)
{
(void)maxval;
return src > thresh ? thresh : src;
@ -70,13 +49,13 @@ class opThreshTrunc : public operators_threshold_t<type, vtype> {
};
template <typename type, typename vtype>
class opThreshToZero : public operators_threshold_t<type, vtype> {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
struct opThreshToZero_t {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
{
(void)maxval;
return (vtype)__nds__bpick((long)src, (long)0, (long)(src > thresh));
}
inline type scalar(const type& src, const type& thresh, const type& maxval) override
inline type scalar(const type& src, const type& thresh, const type& maxval)
{
(void)maxval;
return src > thresh ? src : 0;
@ -84,29 +63,36 @@ class opThreshToZero : public operators_threshold_t<type, vtype> {
};
template <typename type, typename vtype>
class opThreshToZeroInv : public operators_threshold_t<type, vtype> {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval) override
struct opThreshToZeroInv_t {
inline vtype vector(const vtype& src, const vtype& thresh, const vtype& maxval)
{
(void)maxval;
return (vtype)__nds__bpick((long)0, (long)src, (long)(src > thresh));
}
inline type scalar(const type& src, const type& thresh, const type& maxval) override
inline type scalar(const type& src, const type& thresh, const type& maxval)
{
(void)maxval;
return src > thresh ? 0 : src;
}
};
template <typename type, typename vtype, int nlane>
static void threshold_op(const type* src_data, size_t src_step,
type* dst_data, size_t dst_step,
template <typename type, typename vtype, int nlane,
template <typename ttype, typename vttype> typename opThresh_t>
static inline void threshold_op(const uchar* src, size_t src_step,
uchar* dst, size_t dst_step,
int width, int height, int cn,
type thresh, type maxval, int thtype)
double thresh_d, double maxval_d)
{
int i, j;
width *= cn;
type* src_data = (type*)src;
type* dst_data = (type*)dst;
src_step /= sizeof(type);
dst_step /= sizeof(type);
type thresh = saturate_cast<type>(thresh_d);
type maxval = saturate_cast<type>(maxval_d);
vtype vthresh;
vtype vmaxval;
for (i = 0; i < nlane; i++) {
@ -114,62 +100,63 @@ static void threshold_op(const type* src_data, size_t src_step,
vmaxval[i] = maxval;
}
operators_threshold_t<type, vtype>* op;
switch (thtype) {
case CV_HAL_THRESH_BINARY:
op = new opThreshBinary<type, vtype>();
break;
case CV_HAL_THRESH_BINARY_INV:
op = new opThreshBinaryInv<type, vtype>();
break;
case CV_HAL_THRESH_TRUNC:
op = new opThreshTrunc<type, vtype>();
break;
case CV_HAL_THRESH_TOZERO:
op = new opThreshToZero<type, vtype>();
break;
case CV_HAL_THRESH_TOZERO_INV:
op = new opThreshToZeroInv<type, vtype>();
break;
default:
CV_Error(cv::Error::StsBadArg, "");
return;
}
opThresh_t<type, vtype> opThresh;
for (i = 0; i < height; i++, src_data += src_step, dst_data += dst_step) {
for (j = 0; j <= width - nlane; j += nlane) {
vtype vs = *(vtype*)(src_data + j);
*(vtype*)(dst_data + j) = op->vector(vs, vthresh, vmaxval);
*(vtype*)(dst_data + j) = opThresh.vector(*(vtype*)(src_data + j), vthresh, vmaxval);
}
for (; j < width; j++) {
dst_data[j] = op->scalar(src_data[j], thresh, maxval);
dst_data[j] = opThresh.scalar(src_data[j], thresh, maxval);
}
}
delete op;
return;
}
typedef void (*ThreshFunc)(const uchar* src_data, size_t src_step,
uchar* dst_data, size_t dst_step,
int width, int height, int cn,
double thresh, double maxval);
int threshold(const uchar* src_data, size_t src_step,
uchar* dst_data, size_t dst_step,
int width, int height, int depth, int cn,
double thresh, double maxValue, int thresholdType)
{
if (width <= 255 && height <= 255) // slower at small size
return CV_HAL_ERROR_NOT_IMPLEMENTED;
if (depth == CV_8U) {
threshold_op<uchar, uint8x8_t, 8>((uchar*)src_data, src_step, (uchar*)dst_data, dst_step, width, height, cn, (uchar)thresh, (uchar)maxValue, thresholdType);
return CV_HAL_ERROR_OK;
} else if (depth == CV_16S) {
threshold_op<short, int16x4_t, 4>((short*)src_data, src_step, (short*)dst_data, dst_step, width, height, cn, (short)thresh, (short)maxValue, thresholdType);
return CV_HAL_ERROR_OK;
} else if (depth == CV_16U) {
threshold_op<ushort, uint16x4_t, 4>((ushort*)src_data, src_step, (ushort*)dst_data, dst_step, width, height, cn, (ushort)thresh, (ushort)maxValue, thresholdType);
return CV_HAL_ERROR_OK;
} else {
static ThreshFunc thfuncs[4][5] =
{
{
threshold_op<uchar, uint8x8_t, 8, opThreshBinary_t>,
threshold_op<uchar, uint8x8_t, 8, opThreshBinaryInv_t>,
threshold_op<uchar, uint8x8_t, 8, opThreshTrunc_t>,
threshold_op<uchar, uint8x8_t, 8, opThreshToZero_t>,
threshold_op<uchar, uint8x8_t, 8, opThreshToZeroInv_t> },
{
threshold_op<char, int8x8_t, 8, opThreshBinary_t>,
threshold_op<char, int8x8_t, 8, opThreshBinaryInv_t>,
threshold_op<char, int8x8_t, 8, opThreshTrunc_t>,
threshold_op<char, int8x8_t, 8, opThreshToZero_t>,
threshold_op<char, int8x8_t, 8, opThreshToZeroInv_t> },
{
threshold_op<ushort, uint16x4_t, 4, opThreshBinary_t>,
threshold_op<ushort, uint16x4_t, 4, opThreshBinaryInv_t>,
threshold_op<ushort, uint16x4_t, 4, opThreshTrunc_t>,
threshold_op<ushort, uint16x4_t, 4, opThreshToZero_t>,
threshold_op<ushort, uint16x4_t, 4, opThreshToZeroInv_t> },
{
threshold_op<short, int16x4_t, 4, opThreshBinary_t>,
threshold_op<short, int16x4_t, 4, opThreshBinaryInv_t>,
threshold_op<short, int16x4_t, 4, opThreshTrunc_t>,
threshold_op<short, int16x4_t, 4, opThreshToZero_t>,
threshold_op<short, int16x4_t, 4, opThreshToZeroInv_t> }
};
if(depth < 0 || depth > 3 || thresholdType < 0 || thresholdType > 4 || (width < 256 && height < 256))
return CV_HAL_ERROR_NOT_IMPLEMENTED;
}
return CV_HAL_ERROR_NOT_IMPLEMENTED;
thfuncs[depth][thresholdType](src_data, src_step, dst_data, dst_step, width, height, cn, thresh, maxValue);
return CV_HAL_ERROR_OK;
}
} // namespace ndsrvp

@ -3,148 +3,68 @@
// of this distribution and at http://opencv.org/license.html.
#include "ndsrvp_hal.hpp"
#include "opencv2/core.hpp"
#include "opencv2/imgproc/hal/interface.h"
#include "cvutils.hpp"
namespace cv {
namespace ndsrvp {
class WarpAffineInvoker : public ParallelLoopBody {
public:
WarpAffineInvoker(const Mat& _src, Mat& _dst, int _interpolation, int _borderType,
const Scalar& _borderValue, int* _adelta, int* _bdelta, const double* _M)
: ParallelLoopBody()
, src(_src)
, dst(_dst)
, interpolation(_interpolation)
, borderType(_borderType)
, borderValue(_borderValue)
, adelta(_adelta)
, bdelta(_bdelta)
, M(_M)
{
int warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw)
{
const int AB_BITS = MAX(10, (int)INTER_BITS);
int x1 = 0;
for (; x1 < bw; x1 += 2) {
int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] };
int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] };
vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15);
vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
}
virtual void operator()(const Range& range) const CV_OVERRIDE
{
const int BLOCK_SZ = 64;
AutoBuffer<short, 0> __XY(BLOCK_SZ * BLOCK_SZ * 2), __A(BLOCK_SZ * BLOCK_SZ);
short *XY = __XY.data(), *A = __A.data();
const int AB_BITS = MAX(10, (int)INTER_BITS);
const int AB_SCALE = 1 << AB_BITS;
int round_delta = interpolation == CV_HAL_INTER_NEAREST ? AB_SCALE / 2 : AB_SCALE / INTER_TAB_SIZE / 2, x, y, x1, y1;
int bh0 = std::min(BLOCK_SZ / 2, dst.rows);
int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, dst.cols);
bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, dst.rows);
for (y = range.start; y < range.end; y += bh0) {
for (x = 0; x < dst.cols; x += bw0) {
int bw = std::min(bw0, dst.cols - x);
int bh = std::min(bh0, range.end - y);
Mat _XY(bh, bw, CV_16SC2, XY);
Mat dpart(dst, Rect(x, y, bw, bh));
for (y1 = 0; y1 < bh; y1++) {
short* xy = XY + y1 * bw * 2;
int X0 = saturate_cast<int>((M[1] * (y + y1) + M[2]) * AB_SCALE) + round_delta;
int Y0 = saturate_cast<int>((M[4] * (y + y1) + M[5]) * AB_SCALE) + round_delta;
if (interpolation == CV_HAL_INTER_NEAREST) {
x1 = 0;
for (; x1 < bw; x1 += 2) {
int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] };
int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] };
vX = __nds__v_sclip32(__nds__v_sra32(vX, AB_BITS), 15);
vY = __nds__v_sclip32(__nds__v_sra32(vY, AB_BITS), 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
}
for (; x1 < bw; x1++) {
int X = (X0 + adelta[x + x1]) >> AB_BITS;
int Y = (Y0 + bdelta[x + x1]) >> AB_BITS;
xy[x1 * 2] = saturate_cast<short>(X);
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
}
} else {
short* alpha = A + y1 * bw;
x1 = 0;
const int INTER_MASK = INTER_TAB_SIZE - 1;
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
for (; x1 < bw; x1 += 2) {
int32x2_t vX = { X0 + adelta[x + x1], X0 + adelta[x + x1 + 1] };
int32x2_t vY = { Y0 + bdelta[x + x1], Y0 + bdelta[x + x1 + 1] };
vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS));
vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS));
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
}
for (; x1 < bw; x1++) {
int X = (X0 + adelta[x + x1]) >> (AB_BITS - INTER_BITS);
int Y = (Y0 + bdelta[x + x1]) >> (AB_BITS - INTER_BITS);
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1)));
}
}
}
if (interpolation == CV_HAL_INTER_NEAREST)
remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue);
else {
Mat _matA(bh, bw, CV_16U, A);
remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue);
}
}
}
for (; x1 < bw; x1++) {
int X = X0 + adelta[x1];
int Y = Y0 + bdelta[x1];
xy[x1 * 2] = saturate_cast<short>(X);
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
}
private:
Mat src;
Mat dst;
int interpolation, borderType;
Scalar borderValue;
int *adelta, *bdelta;
const double* M;
};
int warpAffine(int src_type,
const uchar* src_data, size_t src_step, int src_width, int src_height,
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[6], int interpolation, int borderType, const double borderValue[4])
{
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
return CV_HAL_ERROR_OK;
}
int x;
AutoBuffer<int> _abdelta(dst.cols * 2);
int *adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
int warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw)
{
const int AB_BITS = MAX(10, (int)INTER_BITS);
const int AB_SCALE = 1 << AB_BITS;
int x1 = 0;
const int INTER_MASK = INTER_TAB_SIZE - 1;
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
for (; x1 < bw; x1 += 2) {
int32x2_t vX = { X0 + adelta[x1], X0 + adelta[x1 + 1] };
int32x2_t vY = { Y0 + bdelta[x1], Y0 + bdelta[x1 + 1] };
vX = __nds__v_sra32(vX, (AB_BITS - INTER_BITS));
vY = __nds__v_sra32(vY, (AB_BITS - INTER_BITS));
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
}
for (x = 0; x < dst.cols; x++) {
adelta[x] = saturate_cast<int>(M[0] * x * AB_SCALE);
bdelta[x] = saturate_cast<int>(M[3] * x * AB_SCALE);
for (; x1 < bw; x1++) {
int X = X0 + adelta[x1];
int Y = Y0 + bdelta[x1];
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK));
}
Range range(0, dst.rows);
WarpAffineInvoker invoker(src, dst, interpolation, borderType,
Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]),
adelta, bdelta, M);
parallel_for_(range, invoker, dst.total() / (double)(1 << 16));
return CV_HAL_ERROR_OK;
}

@ -3,154 +3,90 @@
// of this distribution and at http://opencv.org/license.html.
#include "ndsrvp_hal.hpp"
#include "opencv2/core.hpp"
#include "opencv2/imgproc/hal/interface.h"
#include "cvutils.hpp"
namespace cv {
namespace ndsrvp {
class WarpPerspectiveInvoker : public ParallelLoopBody {
public:
WarpPerspectiveInvoker(const Mat& _src, Mat& _dst, const double* _M, int _interpolation,
int _borderType, const Scalar& _borderValue)
: ParallelLoopBody()
, src(_src)
, dst(_dst)
, M(_M)
, interpolation(_interpolation)
, borderType(_borderType)
, borderValue(_borderValue)
{
int warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw)
{
int x1 = 0;
for (; x1 < bw; x1 += 2) {
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
W1 = W1 ? 1. / W1 : 0;
W2 = W2 ? 1. / W2 : 0;
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
vX = __nds__v_sclip32(vX, 15);
vY = __nds__v_sclip32(vY, 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
}
virtual void operator()(const Range& range) const CV_OVERRIDE
{
const int BLOCK_SZ = 32;
short XY[BLOCK_SZ * BLOCK_SZ * 2], A[BLOCK_SZ * BLOCK_SZ];
int x, y, y1, width = dst.cols, height = dst.rows;
int bh0 = std::min(BLOCK_SZ / 2, height);
int bw0 = std::min(BLOCK_SZ * BLOCK_SZ / bh0, width);
bh0 = std::min(BLOCK_SZ * BLOCK_SZ / bw0, height);
for (y = range.start; y < range.end; y += bh0) {
for (x = 0; x < width; x += bw0) {
int bw = std::min(bw0, width - x);
int bh = std::min(bh0, range.end - y); // height
Mat _XY(bh, bw, CV_16SC2, XY);
Mat dpart(dst, Rect(x, y, bw, bh));
for (y1 = 0; y1 < bh; y1++) {
short* xy = XY + y1 * bw * 2;
double X0 = M[0] * x + M[1] * (y + y1) + M[2];
double Y0 = M[3] * x + M[4] * (y + y1) + M[5];
double W0 = M[6] * x + M[7] * (y + y1) + M[8];
if (interpolation == CV_HAL_INTER_NEAREST) {
int x1 = 0;
for (; x1 < bw; x1 += 2) {
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
W1 = W1 ? 1. / W1 : 0;
W2 = W2 ? 1. / W2 : 0;
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
vX = __nds__v_sclip32(vX, 15);
vY = __nds__v_sclip32(vY, 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vY, (unsigned long)vX);
}
for (; x1 < bw; x1++) {
double W = W0 + M[6] * x1;
W = W ? 1. / W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1 * 2] = saturate_cast<short>(X);
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
}
} else {
short* alpha = A + y1 * bw;
int x1 = 0;
const int INTER_MASK = INTER_TAB_SIZE - 1;
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
for (; x1 < bw; x1 += 2) {
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
W1 = W1 ? INTER_TAB_SIZE / W1 : 0;
W2 = W2 ? INTER_TAB_SIZE / W2 : 0;
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
}
for (; x1 < bw; x1++) {
double W = W0 + M[6] * x1;
W = W ? INTER_TAB_SIZE / W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE - 1)) * INTER_TAB_SIZE + (X & (INTER_TAB_SIZE - 1)));
}
}
}
if (interpolation == CV_HAL_INTER_NEAREST)
remap(src, dpart, _XY, Mat(), interpolation, borderType, borderValue);
else {
Mat _matA(bh, bw, CV_16U, A);
remap(src, dpart, _XY, _matA, interpolation, borderType, borderValue);
}
}
}
for (; x1 < bw; x1++) {
double W = W0 + M[6] * x1;
W = W ? 1. / W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1 * 2] = saturate_cast<short>(X);
xy[x1 * 2 + 1] = saturate_cast<short>(Y);
}
private:
Mat src;
Mat dst;
const double* M;
int interpolation, borderType;
Scalar borderValue;
};
int warpPerspective(int src_type,
const uchar* src_data, size_t src_step, int src_width, int src_height,
uchar* dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[9], int interpolation, int borderType, const double borderValue[4])
return CV_HAL_ERROR_OK;
}
int warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw)
{
Mat src(Size(src_width, src_height), src_type, const_cast<uchar*>(src_data), src_step);
Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step);
int x1 = 0;
const int INTER_MASK = INTER_TAB_SIZE - 1;
const uint32x2_t vmask = { INTER_MASK, INTER_MASK };
for (; x1 < bw; x1 += 2) {
double W1 = W0 + M[6] * x1, W2 = W1 + M[6];
W1 = W1 ? INTER_TAB_SIZE / W1 : 0;
W2 = W2 ? INTER_TAB_SIZE / W2 : 0;
double fX1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W1));
double fX2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * (x1 + 1)) * W2));
double fY1 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W1));
double fY2 = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * (x1 + 1)) * W2));
int32x2_t vX = {saturate_cast<int>(fX1), saturate_cast<int>(fX2)};
int32x2_t vY = {saturate_cast<int>(fY1), saturate_cast<int>(fY2)};
int32x2_t vx = __nds__v_sclip32(__nds__v_sra32(vX, INTER_BITS), 15);
int32x2_t vy = __nds__v_sclip32(__nds__v_sra32(vY, INTER_BITS), 15);
*(uint16x4_t*)(xy + x1 * 2) = (uint16x4_t)__nds__pkbb16((unsigned long)vy, (unsigned long)vx);
uint32x2_t valpha = __nds__v_uadd32(__nds__v_sll32((uint32x2_t)(vY & vmask), INTER_BITS), (uint32x2_t)(vX & vmask));
*(int16x2_t*)(alpha + x1) = (int16x2_t) { (short)(valpha[0]), (short)(valpha[1]) };
}
for (; x1 < bw; x1++) {
double W = W0 + M[6] * x1;
W = W ? INTER_TAB_SIZE / W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0] * x1) * W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3] * x1) * W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1 * 2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1 * 2 + 1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & INTER_MASK) * INTER_TAB_SIZE + (X & INTER_MASK));
}
Range range(0, dst.rows);
WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]));
parallel_for_(range, invoker, dst.total() / (double)(1 << 16));
return CV_HAL_ERROR_OK;
}

@ -1026,7 +1026,7 @@ foreach(hal ${OpenCV_HAL})
ocv_hal_register(NDSRVP_HAL_LIBRARIES NDSRVP_HAL_HEADERS NDSRVP_HAL_INCLUDE_DIRS)
list(APPEND OpenCV_USED_HAL "ndsrvp (ver ${NDSRVP_HAL_VERSION})")
else()
message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not open, disabling ndsrvp...")
message(STATUS "NDSRVP: Andes GNU Toolchain DSP extension is not enabled, disabling ndsrvp...")
endif()
elseif(hal STREQUAL "halrvv")
if(";${CPU_BASELINE_FINAL};" MATCHES ";RVV;")

@ -171,7 +171,7 @@ elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ")
endif()
if(X86 OR X86_64)
ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_COMMON;AVX512_KNL;AVX512_KNM;AVX512_SKX;AVX512_CNL;AVX512_CLX;AVX512_ICL")
ocv_update(CPU_AVX512_COMMON_GROUP "AVX_512F;AVX_512CD")
ocv_update(CPU_AVX512_KNL_GROUP "AVX512_COMMON;AVX512_KNL_EXTRA")
@ -440,7 +440,7 @@ macro(ocv_check_compiler_optimization OPT)
set(_varname "")
if(CPU_${OPT}_TEST_FILE)
set(__available 0)
if(CPU_BASELINE_DETECT)
if(__is_from_baseline OR CPU_BASELINE_DETECT)
set(_varname "HAVE_CPU_${OPT}_SUPPORT")
ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}")
if(${_varname})

@ -1,2 +1,16 @@
#include <emmintrin.h>
int main() { return 0; }
inline __m128i _v128_comgt_epu32(const __m128i& a, const __m128i& b)
{
const __m128i delta = _mm_set1_epi32((int)0x80000000);
return _mm_cmpgt_epi32(_mm_xor_si128(a, delta), _mm_xor_si128(b, delta));
}
int main()
{
__m128i a, b, c;
a = _mm_set1_epi32(0x00000000);
b = _mm_set1_epi32(0x0000ffff);
c = _v128_comgt_epu32(a, b);
return 0;
}

@ -2512,10 +2512,25 @@ the number of points in the view.
@param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
Note that the function assumes the camera intrinsic matrix of the undistorted points to be identity.
This means if you want to distort image points you have to multiply them with \f$K^{-1}\f$.
This means if you want to distort image points you have to multiply them with \f$K^{-1}\f$ or
use another function overload.
*/
CV_EXPORTS_W void distortPoints(InputArray undistorted, OutputArray distorted, InputArray K, InputArray D, double alpha = 0);
/** @overload
Overload of distortPoints function to handle cases when undistorted points are got with non-identity
camera matrix, e.g. output of #estimateNewCameraMatrixForUndistortRectify.
@param undistorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is
the number of points in the view.
@param Kundistorted Camera intrinsic matrix used as new camera matrix for undistortion.
@param K Camera intrinsic matrix \f$cameramatrix{K}\f$.
@param D Input vector of distortion coefficients \f$\distcoeffsfisheye\f$.
@param alpha The skew coefficient.
@param distorted Output array of image points, 1xN/Nx1 2-channel, or vector\<Point2f\> .
@sa estimateNewCameraMatrixForUndistortRectify
*/
CV_EXPORTS_W void distortPoints(InputArray undistorted, OutputArray distorted, InputArray Kundistorted, InputArray K, InputArray D, double alpha = 0);
/** @brief Undistorts 2D points using fisheye model
@param distorted Array of object points, 1xN/Nx1 2-channel (or vector\<Point2f\> ), where N is the

@ -266,6 +266,48 @@ void cv::fisheye::distortPoints(InputArray undistorted, OutputArray distorted, I
}
}
void cv::fisheye::distortPoints(InputArray _undistorted, OutputArray distorted, InputArray Kundistorted, InputArray K, InputArray D, double alpha)
{
CV_INSTRUMENT_REGION();
CV_Assert(_undistorted.type() == CV_32FC2 || _undistorted.type() == CV_64FC2);
CV_Assert(Kundistorted.size() == Size(3,3) && (Kundistorted.type() == CV_32F || Kundistorted.type() == CV_64F));
cv::Mat undistorted = _undistorted.getMat();
cv::Mat normalized(undistorted.size(), CV_64FC2);
Mat Knew = Kundistorted.getMat();
double cx, cy, fx, fy;
if (Knew.depth() == CV_32F)
{
fx = (double)Knew.at<float>(0, 0);
fy = (double)Knew.at<float>(1, 1);
cx = (double)Knew.at<float>(0, 2);
cy = (double)Knew.at<float>(1, 2);
}
else
{
fx = Knew.at<double>(0, 0);
fy = Knew.at<double>(1, 1);
cx = Knew.at<double>(0, 2);
cy = Knew.at<double>(1, 2);
}
size_t n = undistorted.total();
const Vec2f* Xf = undistorted.ptr<Vec2f>();
const Vec2d* Xd = undistorted.ptr<Vec2d>();
Vec2d* normXd = normalized.ptr<Vec2d>();
for (size_t i = 0; i < n; i++)
{
Vec2d p = undistorted.depth() == CV_32F ? (Vec2d)Xf[i] : Xd[i];
normXd[i][0] = (p[0] - cx) / fx;
normXd[i][1] = (p[1] - cy) / fy;
}
cv::fisheye::distortPoints(normalized, distorted, K, D, alpha);
}
void cv::fisheye::undistortPoints( InputArray distorted, OutputArray undistorted, InputArray K, InputArray D,
InputArray R, InputArray P, TermCriteria criteria)
{

@ -86,7 +86,6 @@ TEST_F(fisheyeTest, distortUndistortPoints)
int height = imageSize.height;
/* Create test points */
std::vector<cv::Point2d> points0Vector;
cv::Mat principalPoints = (cv::Mat_<double>(5, 2) << K(0, 2), K(1, 2), // (cx, cy)
/* Image corners */
0, 0,
@ -129,6 +128,95 @@ TEST_F(fisheyeTest, distortUndistortPoints)
}
}
TEST_F(fisheyeTest, distortUndistortPointsNewCameraFixed)
{
int width = imageSize.width;
int height = imageSize.height;
/* Random points inside image */
cv::Mat xy[2] = {};
xy[0].create(100, 1, CV_64F);
theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, width); // x
xy[1].create(100, 1, CV_64F);
theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, height); // y
cv::Mat randomPoints;
merge(xy, 2, randomPoints);
cv::Mat points0 = randomPoints;
cv::Mat Reye = cv::Mat::eye(3, 3, CV_64FC1);
cv::Mat Knew;
cv::fisheye::estimateNewCameraMatrixForUndistortRectify(K, D, imageSize, Reye, Knew);
/* Distort -> Undistort */
cv::Mat distortedPoints;
cv::fisheye::distortPoints(points0, distortedPoints, Knew, K, D);
cv::Mat undistortedPoints;
cv::fisheye::undistortPoints(distortedPoints, undistortedPoints, K, D, Reye, Knew);
EXPECT_MAT_NEAR(points0, undistortedPoints, 1e-8);
/* Undistort -> Distort */
cv::fisheye::undistortPoints(points0, undistortedPoints, K, D, Reye, Knew);
cv::fisheye::distortPoints(undistortedPoints, distortedPoints, Knew, K, D);
EXPECT_MAT_NEAR(points0, distortedPoints, 1e-8);
}
TEST_F(fisheyeTest, distortUndistortPointsNewCameraRandom)
{
int width = imageSize.width;
int height = imageSize.height;
/* Create test points */
std::vector<cv::Point2d> points0Vector;
cv::Mat principalPoints = (cv::Mat_<double>(5, 2) << K(0, 2), K(1, 2), // (cx, cy)
/* Image corners */
0, 0,
0, height,
width, 0,
width, height
);
/* Random points inside image */
cv::Mat xy[2] = {};
xy[0].create(100, 1, CV_64F);
theRNG().fill(xy[0], cv::RNG::UNIFORM, 0, width); // x
xy[1].create(100, 1, CV_64F);
theRNG().fill(xy[1], cv::RNG::UNIFORM, 0, height); // y
cv::Mat randomPoints;
merge(xy, 2, randomPoints);
cv::Mat points0;
cv::Mat Reye = cv::Mat::eye(3, 3, CV_64FC1);
cv::vconcat(principalPoints.reshape(2), randomPoints, points0);
/* Test with random D set */
for (size_t i = 0; i < 10; ++i) {
cv::Mat distortion(1, 4, CV_64F);
theRNG().fill(distortion, cv::RNG::UNIFORM, -0.001, 0.001);
cv::Mat Knew;
cv::fisheye::estimateNewCameraMatrixForUndistortRectify(K, distortion, imageSize, Reye, Knew);
/* Distort -> Undistort */
cv::Mat distortedPoints;
cv::fisheye::distortPoints(points0, distortedPoints, Knew, K, distortion);
cv::Mat undistortedPoints;
cv::fisheye::undistortPoints(distortedPoints, undistortedPoints, K, distortion, Reye, Knew);
EXPECT_MAT_NEAR(points0, undistortedPoints, 1e-8);
/* Undistort -> Distort */
cv::fisheye::undistortPoints(points0, undistortedPoints, K, distortion, Reye, Knew);
cv::fisheye::distortPoints(undistortedPoints, distortedPoints, Knew, K, distortion);
EXPECT_MAT_NEAR(points0, distortedPoints, 1e-8);
}
}
TEST_F(fisheyeTest, solvePnP)
{
const int n = 16;

@ -221,7 +221,7 @@ public:
int all_quads_count;
struct NeighborsFinder {
const float thresh_scale = 1.f;
const float thresh_scale = sqrt(2.f);
ChessBoardDetector& detector;
std::vector<int> neighbors_indices;
std::vector<float> neighbors_dists;
@ -231,8 +231,9 @@ public:
NeighborsFinder(ChessBoardDetector& detector);
bool findCornerNeighbor(
const int idx,
const cv::Point2f& pt,
const int quad_idx,
const int corner_idx,
const cv::Point2f& corner_pt,
float& min_dist,
const float radius,
int& closest_quad_idx,
@ -513,9 +514,23 @@ ChessBoardDetector::NeighborsFinder::NeighborsFinder(ChessBoardDetector& _detect
neighbors_dists.resize(all_corners_count);
}
static double pointSideFromLine(const Point2f& line_direction_vector, const Point2f& vector)
{
return line_direction_vector.cross(vector);
}
static bool arePointsOnSameSideFromLine(const Point2f& line_pt1, const Point2f& line_pt2, const Point2f& pt1, const Point2f& pt2)
{
const Point2f line_direction_vector = line_pt2 - line_pt1;
const Point2f vector1 = pt1 - line_pt1;
const Point2f vector2 = pt2 - line_pt1;
return pointSideFromLine(line_direction_vector, vector1) * pointSideFromLine(line_direction_vector, vector2) > 0.;
}
bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor(
const int idx,
const cv::Point2f& pt,
const int quad_idx,
const int corner_idx,
const cv::Point2f& corner_pt,
float& min_dist,
const float radius,
int& closest_quad_idx,
@ -524,12 +539,12 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor(
{
ChessBoardQuad* p_all_quads = detector.all_quads.data();
const ChessBoardQuad& cur_quad = (const ChessBoardQuad&)p_all_quads[idx];
const ChessBoardQuad& cur_quad = (const ChessBoardQuad&)p_all_quads[quad_idx];
int closest_neighbor_idx = -1;
ChessBoardQuad *closest_quad = 0;
// find the closest corner in all other quadrangles
const std::vector<float> query = { pt.x, pt.y };
const std::vector<float> query = { corner_pt.x, corner_pt.y };
const cvflann::SearchParams search_params(-1);
const int neighbors_count = all_quads_pts_index.radiusSearch(query, neighbors_indices, neighbors_dists, radius, search_params);
@ -537,7 +552,7 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor(
{
const int neighbor_idx = neighbors_indices[neighbor_idx_idx];
const int k = neighbor_idx >> 2;
if (k == idx)
if (k == quad_idx)
continue;
ChessBoardQuad& q_k = p_all_quads[k];
@ -545,7 +560,8 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor(
if (q_k.neighbors[j])
continue;
const float dist = normL2Sqr<float>(pt - all_quads_pts[neighbor_idx]);
const Point2f neighbor_pt = all_quads_pts[neighbor_idx];
const float dist = normL2Sqr<float>(corner_pt - neighbor_pt);
if (dist <= cur_quad.edge_len * thresh_scale &&
dist <= q_k.edge_len * thresh_scale)
{
@ -559,6 +575,24 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor(
DPRINTF("Incompatible edge lengths");
continue;
}
const Point2f mid_pt1 = (cur_quad.corners[corner_idx]->pt + cur_quad.corners[(corner_idx + 1) & 3]->pt) / 2.f;
const Point2f mid_pt2 = (cur_quad.corners[(corner_idx + 2) & 3]->pt + cur_quad.corners[(corner_idx + 3) & 3]->pt) / 2.f;
if (!arePointsOnSameSideFromLine(mid_pt1, mid_pt2, corner_pt, neighbor_pt))
continue;
const Point2f mid_pt3 = (cur_quad.corners[(corner_idx + 1) & 3]->pt + cur_quad.corners[(corner_idx + 2) & 3]->pt) / 2.f;
const Point2f mid_pt4 = (cur_quad.corners[(corner_idx + 3) & 3]->pt + cur_quad.corners[corner_idx]->pt) / 2.f;
if (!arePointsOnSameSideFromLine(mid_pt3, mid_pt4, corner_pt, neighbor_pt))
continue;
const Point2f neighbor_pt_diagonal = q_k.corners[(j + 2) & 3]->pt;
if (!arePointsOnSameSideFromLine(mid_pt1, mid_pt2, corner_pt, neighbor_pt_diagonal))
continue;
if (!arePointsOnSameSideFromLine(mid_pt3, mid_pt4, neighbor_pt, neighbor_pt_diagonal))
continue;
closest_neighbor_idx = neighbor_idx;
closest_quad_idx = k;
closest_corner_idx = j;
@ -588,7 +622,7 @@ bool ChessBoardDetector::NeighborsFinder::findCornerNeighbor(
if (cur_quad.neighbors[j] == closest_quad)
break;
if (normL2Sqr<float>(closest_corner_pt - all_quads_pts[(idx << 2) + j]) < min_dist)
if (normL2Sqr<float>(closest_corner_pt - all_quads_pts[(quad_idx << 2) + j]) < min_dist)
break;
}
if (j < 4)
@ -1792,6 +1826,7 @@ void ChessBoardDetector::findQuadNeighbors()
bool found = neighborsFinder.findCornerNeighbor(
idx,
i,
pt,
min_dist,
radius,
@ -1812,6 +1847,7 @@ void ChessBoardDetector::findQuadNeighbors()
found = neighborsFinder.findCornerNeighbor(
closest_quad_idx,
closest_corner_idx,
closest_corner_pt,
min_dist,
radius,

@ -118,6 +118,7 @@ public:
//! default allocator
CV_WRAP static GpuMat::Allocator* defaultAllocator();
CV_WRAP static void setDefaultAllocator(GpuMat::Allocator* allocator);
CV_WRAP static GpuMat::Allocator* getStdAllocator();
//! default constructor
CV_WRAP explicit GpuMat(GpuMat::Allocator* allocator = GpuMat::defaultAllocator());

@ -135,6 +135,7 @@ namespace
DefaultAllocator cudaDefaultAllocator;
GpuMat::Allocator* g_defaultAllocator = &cudaDefaultAllocator;
GpuMat::Allocator* g_stdAllocator = &cudaDefaultAllocator;
}
GpuMat::Allocator* cv::cuda::GpuMat::defaultAllocator()
@ -148,6 +149,12 @@ void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator)
g_defaultAllocator = allocator;
}
GpuMat::Allocator* cv::cuda::GpuMat::getStdAllocator()
{
return g_stdAllocator;
}
/////////////////////////////////////////////////////
/// create

@ -420,6 +420,11 @@ void cv::cuda::GpuMat::setDefaultAllocator(Allocator* allocator)
throw_no_cuda();
}
GpuMat::Allocator* cv::cuda::GpuMat::getStdAllocator()
{
return 0;
}
void cv::cuda::GpuMat::create(int _rows, int _cols, int _type)
{
CV_UNUSED(_rows);

@ -718,16 +718,13 @@ void Mat::create(int d0, const int* _sizes, int _type)
if( total() > 0 )
{
MatAllocator *a = allocator, *a0 = getDefaultAllocator();
#ifdef HAVE_TGPU
if( !a || a == tegra::getAllocator() )
a = tegra::getAllocator(d, _sizes, _type);
#endif
if(!a)
a = a0;
try
{
u = a->allocate(dims, size, _type, 0, step.p, ACCESS_RW /* ignored */, USAGE_DEFAULT);
CV_Assert(u != 0);
allocator = a;
}
catch (...)
{
@ -735,6 +732,7 @@ void Mat::create(int d0, const int* _sizes, int _type)
throw;
u = a0->allocate(dims, size, _type, 0, step.p, ACCESS_RW /* ignored */, USAGE_DEFAULT);
CV_Assert(u != 0);
allocator = a0;
}
CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
}

@ -737,6 +737,8 @@ public:
if( c != '\"' && c != '\'' )
{
ptr = skipSpaces( ptr, CV_XML_INSIDE_TAG );
if(!ptr)
CV_PARSE_ERROR_CPP("Invalid attribute value");
if( *ptr != '\"' && *ptr != '\'' )
CV_PARSE_ERROR_CPP( "Attribute value should be put into single or double quotes" );
}

@ -0,0 +1,145 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "opencv2/core/core_c.h" // needed for CV_AUTOSTEP
namespace opencv_test { namespace {
// Dummy allocator implementation copied from the default OpenCV allocator with some simplifications
struct DummyAllocator: public cv::MatAllocator
{
public:
DummyAllocator() {};
~DummyAllocator() {};
cv::UMatData* allocate(int dims, const int* sizes, int type,
void* data0, size_t* step, cv::AccessFlag flags,
cv::UMatUsageFlags usageFlags) const
{
CV_UNUSED(flags);
CV_UNUSED(usageFlags);
size_t total = CV_ELEM_SIZE(type);
for( int i = dims-1; i >= 0; i-- )
{
if( step )
{
if( data0 && step[i] != CV_AUTOSTEP )
{
CV_Assert(total <= step[i]);
total = step[i];
}
else
step[i] = total;
}
total *= sizes[i];
}
uchar* data = nullptr;
if (data0)
{
data = (uchar*)data0;
}
else
{
data = new uchar[total];
DummyAllocator::allocatedBytes += total;
DummyAllocator::allocations++;
}
cv::UMatData* u = new cv::UMatData(this);
u->data = u->origdata = data;
u->size = total;
if(data0)
u->flags |= cv::UMatData::USER_ALLOCATED;
return u;
}
bool allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const
{
CV_UNUSED(accessFlags);
CV_UNUSED(usageFlags);
if(!u) return false;
return true;
}
void deallocate(cv::UMatData* u) const
{
if(!u)
return;
CV_Assert(u->urefcount == 0);
CV_Assert(u->refcount == 0);
if( !(u->flags & cv::UMatData::USER_ALLOCATED) )
{
delete[] u->origdata;
DummyAllocator::deallocations++;
u->origdata = 0;
}
delete u;
}
static size_t allocatedBytes;
static int allocations;
static int deallocations;
};
size_t DummyAllocator::allocatedBytes = 0;
int DummyAllocator::allocations = 0;
int DummyAllocator::deallocations = 0;
cv::MatAllocator* getDummyAllocator()
{
static cv::MatAllocator* allocator = new DummyAllocator;
return allocator;
}
struct AllocatorTest : public testing::Test {
void SetUp() override {
cv::MatAllocator* allocator = getDummyAllocator();
EXPECT_TRUE(allocator != nullptr);
cv::Mat::setDefaultAllocator(allocator);
}
void TearDown() override {
cv::Mat::setDefaultAllocator(cv::Mat::getStdAllocator());
}
};
TEST_F(AllocatorTest, DummyAllocator)
{
cv::MatAllocator* dummy = getDummyAllocator();
DummyAllocator::allocatedBytes = 0;
DummyAllocator::allocations = 0;
DummyAllocator::deallocations = 0;
{
cv::Mat src1 = cv::Mat::ones (16, 16, CV_8UC1);
EXPECT_TRUE(!src1.empty());
EXPECT_EQ(src1.allocator, dummy);
cv::Mat src1_roi = src1(cv::Rect(2,2,8,8));
EXPECT_EQ(src1_roi.allocator, dummy);
cv::MatAllocator* standard = cv::Mat::getStdAllocator();
cv::Mat::setDefaultAllocator(standard);
cv::Mat src2 = cv::Mat::ones (16, 16, CV_8UC1);
EXPECT_TRUE(!src2.empty());
EXPECT_EQ(src2.allocator, standard);
src1.create(32, 32, CV_8UC1);
EXPECT_EQ(src1.allocator, dummy);
}
size_t expect_allocated = 16*16*sizeof(uchar) + 32*32*sizeof(uchar);
EXPECT_EQ(expect_allocated, DummyAllocator::allocatedBytes);
// ROI should not trigger extra allocations
EXPECT_EQ(2, DummyAllocator::allocations);
EXPECT_EQ(2, DummyAllocator::deallocations);
}
}} // namespace

@ -2007,4 +2007,22 @@ INSTANTIATE_TEST_CASE_P( /*nothing*/,
Core_InputOutput_regression_25073,
Values("test.json", "test.xml", "test.yml") );
// see https://github.com/opencv/opencv/issues/25946
TEST(Core_InputOutput, FileStorage_invalid_attribute_value_regression_25946)
{
const std::string fileName = cv::tempfile("FileStorage_invalid_attribute_value_exception_test.xml");
const std::string content = "<?xml \n_=";
std::fstream testFile;
testFile.open(fileName.c_str(), std::fstream::out);
if(!testFile.is_open()) FAIL();
testFile << content;
testFile.close();
FileStorage fs;
EXPECT_ANY_THROW( fs.open(fileName, FileStorage::READ + FileStorage::FORMAT_XML) );
ASSERT_EQ(0, std::remove(fileName.c_str()));
}
}} // namespace

@ -345,7 +345,7 @@ TEST(Samples, findFile)
{
cv::utils::logging::LogLevel prev = cv::utils::logging::setLogLevel(cv::utils::logging::LOG_LEVEL_VERBOSE);
cv::String path;
ASSERT_NO_THROW(path = samples::findFile("lena.jpg", false));
ASSERT_NO_THROW(path = samples::findFile("HappyFish.jpg", false));
EXPECT_NE(std::string(), path.c_str());
cv::utils::logging::setLogLevel(prev);
}

@ -15,7 +15,10 @@ namespace dnn
static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int axis)
{
// The data is the 1-D scales or zeropoints.
CV_Assert(axis >= 0 && targetShape.size() > axis && data.total() == targetShape[axis]);
CV_CheckGE(axis, 0, "Quantization axis must be non-negative.");
CV_CheckGT((int)targetShape.size(),axis,"Quantization axis must be within the valid range of target shape dimensions.");
CV_CheckEQ((int)data.total(), (int)targetShape[axis], "Data total size must match the size of the specified target dimension.");
std::vector<int> broadcast_axes;
for (int i = 0; i < targetShape.size(); i++)
{
@ -35,29 +38,98 @@ static void broadcast1D2TargetMat(Mat& data, const MatShape& targetShape, int ax
}
}
static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector<float>& scales,
const std::vector<int>& zeropoints, const MatShape& targetShape, int axis)
static void block_repeat(InputArray src, const MatShape& srcShape, int axis, int repetitions, OutputArray dst)
{
// broad cast the scales and zeropoint to the input shape.
MatShape subTargetShape(targetShape.size(), 1);
subTargetShape[axis] = scales.size();
CV_Assert(src.getObj() != dst.getObj());
CV_Check(axis, axis >= 0 && axis < src.dims(), "Axis out of range");
CV_CheckGT(repetitions, 1, "More than one repetition expected");
zeropointsMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
scalesMat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
Mat src_mat = src.getMat();
Mat dst_mat;
const int len = scales.size();
// Deep copy the scales and zeropoint data and prevent the original data from being changed.
if (src_mat.depth() != CV_32F)
src_mat.convertTo(src_mat, CV_32F);
float * scalePtr = scalesMat.ptr<float>(0);
for (int i = 0; i < len; i++)
scalePtr[i] = scales[i];
MatShape sshape = srcShape;
MatShape dshape = srcShape;
size_t dtype_bytes = src_mat.elemSize();
int chunk_size = dtype_bytes;
int num_chunks = 1;
dshape[axis] *= repetitions;
for (int i = axis+1; i < sshape.size(); ++i)
chunk_size*=sshape[i];
for (int i = 0; i <= axis; ++i)
num_chunks*=sshape[i];
dst.create(dshape.size(), dshape.data(), src_mat.type());
dst_mat = dst.getMat();
CV_Assert(dst_mat.isContinuous());
CV_Assert(src_mat.isContinuous());
for (int i = 0; i < repetitions; ++i) {
size_t src_offset = 0;
size_t dst_offset = i * chunk_size;
for (int j = 0; j < num_chunks; ++j) {
memcpy(dst_mat.data + dst_offset, src_mat.data + src_offset, chunk_size);
src_offset += chunk_size;
dst_offset += chunk_size * repetitions;
}
}
}
template <typename T>
static void copyVecToMat(Mat& mat, const std::vector<T>& data){
float * matPtr = mat.ptr<float>(0);
const int len = data.size();
float * zpPtr = zeropointsMat.ptr<float>(0);
for (int i = 0; i < len; i++)
zpPtr[i] = (float )zeropoints[i];
matPtr[i] = (float) data[i];
}
broadcast1D2TargetMat(scalesMat, targetShape, axis);
broadcast1D2TargetMat(zeropointsMat, targetShape, axis);
template <typename T>
static void broadcastBlockedMatrix(Mat& mat, const std::vector<T>& data, const MatShape& targetShape, int axis, int block_size){
CV_Check(block_size, targetShape[axis] % block_size == 0 && block_size <= targetShape[axis], "Block size must be a divisor of the target dimension size and not exceed it.");
MatShape subTargetShape(targetShape);
subTargetShape[axis] = static_cast<int>(subTargetShape[axis] / block_size);
block_repeat(data, subTargetShape, axis, block_size, mat);
}
template <typename T>
static void broadcastStandardMatrix(Mat& mat, const std::vector<T>& data, const MatShape& targetShape, int axis)
{
MatShape subTargetShape(targetShape.size(), 1);
subTargetShape[axis] = data.size();
mat.create(subTargetShape.size(), subTargetShape.data(), CV_32FC1);
copyVecToMat(mat,data);
broadcast1D2TargetMat(mat, targetShape, axis);
}
static void broadcastScaleAndZeropoint(Mat& scalesMat, Mat& zeropointsMat, const std::vector<float>& scales,
const std::vector<int>& zeropoints, const MatShape& targetShape, int axis, int block_size)
{
// broad cast the scales and zeropoint to the input shape.
if (block_size == 0)
{
broadcastStandardMatrix(zeropointsMat, zeropoints, targetShape, axis);
broadcastStandardMatrix(scalesMat, scales, targetShape, axis);
}
else
{
broadcastBlockedMatrix(zeropointsMat, zeropoints, targetShape, axis, block_size);
broadcastBlockedMatrix(scalesMat, scales, targetShape, axis, block_size);
}
}
// Quantize FP32/FP16 Inputs to INT8
@ -65,13 +137,17 @@ class QuantizeLayerImpl CV_FINAL : public QuantizeLayer
{
public:
int axis;
int block_size;
bool is1D;
Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
Mat scalesMat, zeropointsMat; // Saving the broadcasted scales data.
bool quantParamExternal = true; // Indicates if the quantization parameters (scale and zero point) are provided as inputs to the node.
QuantizeLayerImpl(const LayerParams& params)
{
is1D = params.get<bool>("is1D", false);
axis = params.get<int>("axis", 1);
block_size = params.get<int>("block_size", 0);
if (!is1D)
{
scales.push_back(params.get<float>("scales", 1.0f));
@ -82,7 +158,7 @@ public:
DictValue paramScales = params.get("scales");
int i, n = paramScales.size();
CV_Assert(n > 0);
CV_CheckGT(n, 0, "Scale missing.");
scales.resize(n, 0.);
for (i = 0; i < n; i++)
scales[i] = paramScales.get<float>(i);
@ -108,7 +184,7 @@ public:
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return false;
}
@ -134,7 +210,7 @@ public:
if (is1D)
{
MatShape inputShape = shape(inputs[0]);
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
}
}
@ -156,6 +232,39 @@ public:
return true;
}
#endif
void processInputOutput(std::vector<Mat>& inputs, std::vector<Mat>& outputs)
{
CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
quantParamExternal &= inputs.size() > 1;
// Scale and zeropoint taken as input
if (quantParamExternal)
{
quantParamExternal = false;
scalesMat = inputs[1];
scalesMat.reshape(1, 1).copyTo(scales);
if(scalesMat.total() > 1) is1D = true;
if (inputs.size() > 2)
{
zeropointsMat = inputs[2];
CV_CheckEQ((int)zeropointsMat.total(), (int)scalesMat.total(), "Scale and zero point elements number must match.");
zeropointsMat.reshape(1, 1).copyTo(zeropoints);
}
if (is1D)
{
MatShape inputShape = shape(inputs[0]);
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
}
}
if (outputs[0].depth() != CV_8S)
outputs[0].convertTo(outputs[0], CV_8S);
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
@ -169,14 +278,13 @@ public:
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
if (outputs[0].depth() != CV_8S)
outputs[0].convertTo(outputs[0], CV_8S);
processInputOutput(inputs, outputs);
if (is1D)
{
Mat inputTmp;
divide(inputs[0], scalesMat, inputTmp);
subtract(inputTmp, zeropointsMat, inputTmp);
add(inputTmp, zeropointsMat, inputTmp);
inputTmp.convertTo(outputs[0], CV_8S);
}
@ -200,13 +308,16 @@ class DequantizeLayerImpl CV_FINAL : public DequantizeLayer
{
public:
int axis;
int block_size;
bool is1D;
Mat scalesMat, zeropointsMat; // Saving the broadcasetd scales data.
bool quantParamExternal = true;
DequantizeLayerImpl(const LayerParams& params)
{
is1D = params.get<bool>("is1D", false);
axis = params.get<int>("axis", 1);
block_size = params.get<int>("block_size", 0);
if (!is1D)
{
@ -218,7 +329,7 @@ public:
DictValue paramScales = params.get("scales");
int i, n = paramScales.size();
CV_Assert(n > 0);
CV_CheckGT(n, 0, "Scale missing.");
scales.resize(n);
for (i = 0; i < n; i++)
scales[i] = paramScales.get<float>(i);
@ -244,7 +355,7 @@ public:
std::vector<MatShape> &outputs,
std::vector<MatShape> &internals) const CV_OVERRIDE
{
CV_Assert(inputs.size() == 1);
CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
return false;
}
@ -273,7 +384,7 @@ public:
if (is1D)
{
MatShape inputShape = shape(inputs[0]);
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis);
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
}
}
@ -292,6 +403,39 @@ public:
}
#endif
void processInputOutput(std::vector<Mat>& inputs, std::vector<Mat>& outputs)
{
CV_Check(inputs.size(), inputs.size() >= 1 && inputs.size() <= 3, "Number of inputs must be between 1 and 3 inclusive.");
quantParamExternal &= inputs.size() > 1;
// Scale and zeropoint taken as input
if (quantParamExternal)
{
quantParamExternal = false;
scalesMat = inputs[1];
scalesMat.reshape(1, 1).copyTo(scales);
if(scalesMat.total() > 1) is1D = true;
if (inputs.size() > 2)
{
zeropointsMat = inputs[2];
CV_CheckEQ((int)zeropointsMat.total(), (int)scalesMat.total(), "Scale and zero point elements number must match.");
zeropointsMat.reshape(1, 1).copyTo(zeropoints);
}
if (is1D)
{
MatShape inputShape = shape(inputs[0]);
broadcastScaleAndZeropoint(scalesMat, zeropointsMat, scales, zeropoints, inputShape, axis, block_size);
}
}
if (outputs[0].depth() != CV_32F)
outputs[0].convertTo(outputs[0], CV_32F);
}
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
{
CV_TRACE_FUNCTION();
@ -304,8 +448,7 @@ public:
inputs_arr.getMatVector(inputs);
outputs_arr.getMatVector(outputs);
if (outputs[0].depth() != CV_32F)
outputs[0].convertTo(outputs[0], CV_32F);
processInputOutput(inputs, outputs);
if (is1D)
{

@ -803,7 +803,7 @@ struct GeluFunctor : public BaseFunctor {
#endif
#ifdef HAVE_DNN_NGRAPH
std::shared_ptr<ngraph::Node> initNgraphAPI(const ngraph::Output<ngraph::Node>& node)
std::shared_ptr<ov::Node> initNgraphAPI(const ov::Output<ov::Node>& node)
{
return std::make_shared<ov::op::v0::Gelu>(node);
}

@ -1057,7 +1057,7 @@ public:
// In case only one input
if (inputs.size() == 1) {
auto &ieInpNode = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
ngraph::OutputVector inp{ieInpNode};
ov::OutputVector inp{ieInpNode};
auto blank = std::make_shared<ov::op::v0::Concat>(inp, 0);
return Ptr<BackendNode>(new InfEngineNgraphNode(blank));
}

@ -3280,6 +3280,17 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx
// or 1-D tensor (per-channel quantized).
bool is1D = false;
if (layerParams.type == "Quantize")
layerParams.set("depth", CV_8S);
else // Dequantize
layerParams.set("depth", CV_32F);
// If scale is not defined as a constant blob, it is considered an external input.
if(constBlobs.find(node_proto.input(1)) == constBlobs.end()){
addLayer(layerParams, node_proto);
return;
}
Mat scaleMat = getBlob(node_proto, 1);
if(scaleMat.total() > 1) is1D = true;
@ -3321,11 +3332,6 @@ void ONNXImporter::parseQuantDequant(LayerParams& layerParams, const opencv_onnx
layerParams.set("zeropoints", zeropoint);
}
if (layerParams.type == "Quantize")
layerParams.set("depth", CV_8S);
else // Dequantize
layerParams.set("depth", CV_32F);
if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) // Variable input.
{
std::vector<Mat> inputs, outputs;

@ -224,6 +224,7 @@ static const TestCase testConformanceConfig[] = {
{"test_depthtospace_example", 1, 1},
{"test_dequantizelinear", 3, 1},
{"test_dequantizelinear_axis", 3, 1},
{"test_dequantizelinear_blocked", 3, 1},
{"test_det_2d", 1, 1},
{"test_det_nd", 1, 1},
{"test_div", 2, 1},
@ -569,6 +570,7 @@ static const TestCase testConformanceConfig[] = {
{"test_qlinearmatmul_3D", 8, 1},
{"test_quantizelinear", 3, 1},
{"test_quantizelinear_axis", 3, 1},
{"test_quantizelinear_blocked", 3, 1},
{"test_range_float_type_positive_delta", 3, 1},
{"test_range_float_type_positive_delta_expanded", 3, 1},
{"test_range_int32_type_negative_delta", 3, 1},

@ -501,9 +501,11 @@ CASE(test_depthtospace_dcr_mode)
CASE(test_depthtospace_example)
// no filter
CASE(test_dequantizelinear)
// no filter
SKIP;
CASE(test_dequantizelinear_axis)
// no filter
SKIP;
CASE(test_dequantizelinear_blocked)
SKIP;
CASE(test_det_2d)
// no filter
CASE(test_det_nd)
@ -1280,9 +1282,11 @@ CASE(test_qlinearmatmul_2D)
CASE(test_qlinearmatmul_3D)
// no filter
CASE(test_quantizelinear)
// no filter
SKIP;
CASE(test_quantizelinear_axis)
// no filter
SKIP;
CASE(test_quantizelinear_blocked)
SKIP;
CASE(test_range_float_type_positive_delta)
// no filter
CASE(test_range_float_type_positive_delta_expanded)

@ -48,6 +48,9 @@
"test_cumsum_2d_axis_1",
"test_cumsum_2d_negative_axis",
"test_concat_1d_axis_negative_1",
"test_dequantizelinear",
"test_dequantizelinear_axis",
"test_dequantizelinear_blocked",
"test_div_uint8",
"test_flatten_axis0",
"test_flatten_axis2",
@ -71,6 +74,9 @@
"test_pow_types_float32_int32", // vulkan backend does not take tensor other than float32 data type
"test_pow_types_float32_int64", // vulkan backend does not take tensor other than float32 data type
"test_pow_types_int", // vulkan backend does not take tensor other than float32 data type
"test_quantizelinear",
"test_quantizelinear_axis",
"test_quantizelinear_blocked",
"test_softmax_default_axis",
"test_sub_bcast",
"test_sub_uint8",

@ -0,0 +1,4 @@
"test_dequantizelinear_blocked", // Issue https://github.com/opencv/opencv/issues/25999
"test_quantizelinear", // Issue https://github.com/opencv/opencv/issues/25999
"test_quantizelinear_axis", // Issue https://github.com/opencv/opencv/issues/25999
"test_quantizelinear_blocked", // Issue https://github.com/opencv/opencv/issues/25999

@ -1,4 +1,7 @@
"test_averagepool_3d_default",
"test_dequantizelinear",
"test_dequantizelinear_axis",
"test_dequantizelinear_blocked",
"test_dropout_default_ratio",
"test_globalmaxpool",
"test_globalmaxpool_precomputed",
@ -14,7 +17,10 @@
"test_maxpool_2d_same_upper",
"test_maxpool_2d_strides",
"test_maxpool_3d_default",
"test_pow", // fp16 accuracy issue
"test_pow",
"test_quantizelinear",
"test_quantizelinear_axis",
"test_quantizelinear_blocked",
"test_softmax_large_number",
"test_softmax_large_number_expanded",
"test_split_equal_parts_1d",

@ -1,5 +1,11 @@
"test_averagepool_3d_default",
"test_dequantizelinear",
"test_dequantizelinear_axis",
"test_dequantizelinear_blocked",
"test_maxpool_3d_default",
"test_quantizelinear",
"test_quantizelinear_axis",
"test_quantizelinear_blocked",
"test_scatter_elements_with_axis",
"test_scatter_elements_with_duplicate_indices",
"test_scatter_elements_with_negative_indices",

@ -80,8 +80,6 @@
"test_convtranspose_pad", // Issue::Parser::Weights are required as inputs
"test_convtranspose_pads", // Issue::Parser::Weights are required as inputs
"test_convtranspose_with_kernel", // Issue::Parser::Weights are required as inputs
"test_dequantizelinear", // Issue::Parser::Weights are required as inputs
"test_dequantizelinear_axis", // Issue::Parser::Weights are required as inputs
"test_det_2d", // Issue:: Unkonwn error
"test_det_nd", // Issue:: Unkonwn error
"test_dropout_default_mask", // Issue::cvtest::norm::wrong data type
@ -235,8 +233,6 @@
"test_qlinearconv", // Issue::Parser: Blob x_scale not found in const blobs in function 'getBlob' (weights are required as inputs)
"test_qlinearmatmul_2D", // Issue:: Parser: Variable weights is not supported in function 'parseQMatMul'
"test_qlinearmatmul_3D", // ---- same as above ---
"test_quantizelinear", // Issue::Parser: Blob y_scale not found in const blobs in function 'getBlob' (weights are required as inputs)
"test_quantizelinear_axis", // ---- same as above ---
"test_range_float_type_positive_delta", // Issue:: Unsupported data type in function. Unsupported type in function 'parseCast'
"test_range_float_type_positive_delta_expanded", // ---- same as above ---
"test_range_int32_type_negative_delta", // Issue:: Unsupported data type: INT32 in function. Unsupported type in function 'parseCast'

@ -1,5 +1,13 @@
package org.opencv.test.features2d;
import org.junit.Assert;
import org.opencv.core.CvType;
import org.opencv.core.KeyPoint;
import org.opencv.core.Mat;
import org.opencv.core.MatOfKeyPoint;
import org.opencv.core.Scalar;
import org.opencv.features2d.Features2d;
import org.opencv.features2d.ORB;
import org.opencv.test.OpenCVTestCase;
public class ORBFeatureDetectorTest extends OpenCVTestCase {
@ -36,4 +44,35 @@ public class ORBFeatureDetectorTest extends OpenCVTestCase {
fail("Not yet implemented");
}
public void testDetectTwoPoints() {
Mat img = new Mat(256,256, CvType.CV_8UC3, new Scalar(0,0,0));
img.put(35, 40, 255,255, 255);
img.put(152, 98, 200,0, 0);
MatOfKeyPoint keypoints = new MatOfKeyPoint();
ORB orb = ORB.create();
Mat descriptors = new Mat();
orb.detectAndCompute(img, new Mat(), keypoints, descriptors);
KeyPoint[] keypointsArray = keypoints.toArray();
assertEquals(2, keypointsArray.length);
long x1 = Math.round(keypointsArray[0].pt.x);
long y1 = Math.round(keypointsArray[0].pt.y);
long x2 = Math.round(keypointsArray[1].pt.x);
long y2 = Math.round(keypointsArray[1].pt.y);
if (x2 > x1) {
assertEquals(40, x1);
assertEquals(35, y1);
assertEquals(98, x2);
assertEquals(152, y2);
} else {
assertEquals(40, x2);
assertEquals(35, y2);
assertEquals(98, x1);
assertEquals(152, y1);
}
}
}

@ -377,26 +377,30 @@ void WBaseStream::allocate()
}
void WBaseStream::writeBlock()
bool WBaseStream::writeBlock()
{
int size = (int)(m_current - m_start);
CV_Assert(isOpened());
if( size == 0 )
return;
return true;
if( m_buf )
{
size_t sz = m_buf->size();
m_buf->resize( sz + size );
memcpy( &(*m_buf)[sz], m_start, size );
m_current = m_start;
m_block_pos += size;
return true;
}
else
{
fwrite( m_start, 1, size, m_file );
size_t written = fwrite( m_start, 1, size, m_file );
m_current = m_start;
m_block_pos += size;
return written == (size_t)size;
}
m_current = m_start;
m_block_pos += size;
}
@ -463,15 +467,17 @@ WLByteStream::~WLByteStream()
{
}
void WLByteStream::putByte( int val )
bool WLByteStream::putByte( int val )
{
*m_current++ = (uchar)val;
if( m_current >= m_end )
writeBlock();
return writeBlock();
return true;
}
void WLByteStream::putBytes( const void* buffer, int count )
bool WLByteStream::putBytes( const void* buffer, int count )
{
uchar* data = (uchar*)buffer;
@ -492,12 +498,18 @@ void WLByteStream::putBytes( const void* buffer, int count )
count -= l;
}
if( m_current == m_end )
writeBlock();
{
bool written = writeBlock();
if (!written)
return false;
}
}
return true;
}
void WLByteStream::putWord( int val )
bool WLByteStream::putWord( int val )
{
uchar *current = m_current;
@ -507,17 +519,19 @@ void WLByteStream::putWord( int val )
current[1] = (uchar)(val >> 8);
m_current = current + 2;
if( m_current == m_end )
writeBlock();
return writeBlock();
}
else
{
putByte(val);
putByte(val >> 8);
}
return true;
}
void WLByteStream::putDWord( int val )
bool WLByteStream::putDWord( int val )
{
uchar *current = m_current;
@ -529,7 +543,7 @@ void WLByteStream::putDWord( int val )
current[3] = (uchar)(val >> 24);
m_current = current + 4;
if( m_current == m_end )
writeBlock();
return writeBlock();
}
else
{
@ -538,6 +552,8 @@ void WLByteStream::putDWord( int val )
putByte(val >> 16);
putByte(val >> 24);
}
return true;
}
@ -548,7 +564,7 @@ WMByteStream::~WMByteStream()
}
void WMByteStream::putWord( int val )
bool WMByteStream::putWord( int val )
{
uchar *current = m_current;
@ -558,17 +574,19 @@ void WMByteStream::putWord( int val )
current[1] = (uchar)val;
m_current = current + 2;
if( m_current == m_end )
writeBlock();
return writeBlock();
}
else
{
putByte(val >> 8);
putByte(val);
}
return true;
}
void WMByteStream::putDWord( int val )
bool WMByteStream::putDWord( int val )
{
uchar *current = m_current;
@ -580,7 +598,7 @@ void WMByteStream::putDWord( int val )
current[3] = (uchar)val;
m_current = current + 4;
if( m_current == m_end )
writeBlock();
return writeBlock();
}
else
{
@ -589,6 +607,8 @@ void WMByteStream::putDWord( int val )
putByte(val >> 8);
putByte(val);
}
return true;
}
}

@ -63,6 +63,12 @@ DECLARE_RBS_EXCEPTION(THROW_FORB)
DECLARE_RBS_EXCEPTION(BAD_HEADER)
#define RBS_BAD_HEADER RBS_BAD_HEADER_Exception(cv::Error::StsError, "Invalid header", CV_Func, __FILE__, __LINE__)
#define CHECK_WRITE(action) \
if (!action) \
{ \
return false; \
}
typedef unsigned long ulong;
// class RBaseStream - base class for other reading streams.
@ -147,7 +153,7 @@ protected:
bool m_is_opened;
std::vector<uchar>* m_buf;
virtual void writeBlock();
virtual bool writeBlock();
virtual void release();
virtual void allocate();
};
@ -160,10 +166,10 @@ class WLByteStream : public WBaseStream
public:
virtual ~WLByteStream();
void putByte( int val );
void putBytes( const void* buffer, int count );
void putWord( int val );
void putDWord( int val );
bool putByte( int val );
bool putBytes( const void* buffer, int count );
bool putWord( int val );
bool putDWord( int val );
};
@ -173,8 +179,8 @@ class WMByteStream : public WLByteStream
{
public:
virtual ~WMByteStream();
void putWord( int val );
void putDWord( int val );
bool putWord( int val );
bool putDWord( int val );
};
inline unsigned BSWAP(unsigned v)

@ -635,38 +635,40 @@ bool BmpEncoder::write( const Mat& img, const std::vector<int>& )
m_buf->reserve( alignSize(fileSize + 16, 256) );
// write signature 'BM'
strm.putBytes( fmtSignBmp, (int)strlen(fmtSignBmp) );
CHECK_WRITE(strm.putBytes( fmtSignBmp, (int)strlen(fmtSignBmp) ));
// write file header
strm.putDWord( validateToInt(fileSize) ); // file size
strm.putDWord( 0 );
strm.putDWord( headerSize );
CHECK_WRITE(strm.putDWord( validateToInt(fileSize) )); // file size
CHECK_WRITE(strm.putDWord( 0 ));
CHECK_WRITE(strm.putDWord( headerSize ));
// write bitmap header
strm.putDWord( bitmapHeaderSize );
strm.putDWord( width );
strm.putDWord( height );
strm.putWord( 1 );
strm.putWord( channels << 3 );
strm.putDWord( BMP_RGB );
strm.putDWord( 0 );
strm.putDWord( 0 );
strm.putDWord( 0 );
strm.putDWord( 0 );
strm.putDWord( 0 );
CHECK_WRITE(strm.putDWord( bitmapHeaderSize ));
CHECK_WRITE(strm.putDWord( width ));
CHECK_WRITE(strm.putDWord( height ));
CHECK_WRITE(strm.putWord( 1 ));
CHECK_WRITE(strm.putWord( channels << 3 ));
CHECK_WRITE(strm.putDWord( BMP_RGB ));
CHECK_WRITE(strm.putDWord( 0 ));
CHECK_WRITE(strm.putDWord( 0 ));
CHECK_WRITE(strm.putDWord( 0 ));
CHECK_WRITE(strm.putDWord( 0 ));
CHECK_WRITE(strm.putDWord( 0 ));
if( channels == 1 )
{
FillGrayPalette( palette, 8 );
strm.putBytes( palette, sizeof(palette));
CHECK_WRITE(strm.putBytes( palette, sizeof(palette)));
}
width *= channels;
for( int y = height - 1; y >= 0; y-- )
{
strm.putBytes( img.ptr(y), width );
CHECK_WRITE(strm.putBytes( img.ptr(y), width ));
if( fileStep > width )
strm.putBytes( zeropad, fileStep - width );
{
CHECK_WRITE(strm.putBytes( zeropad, fileStep - width ));
}
}
strm.close();

@ -64,11 +64,11 @@ T read_number(cv::RLByteStream& strm)
return atoT<T>(str);
}
template<typename T> void write_anything(cv::WLByteStream& strm, const T& t)
template<typename T> bool write_anything(cv::WLByteStream& strm, const T& t)
{
std::ostringstream ss;
ss << t;
strm.putBytes(ss.str().c_str(), static_cast<int>(ss.str().size()));
return strm.putBytes(ss.str().c_str(), static_cast<int>(ss.str().size()));
}
}
@ -206,33 +206,33 @@ bool PFMEncoder::write(const Mat& img, const std::vector<int>& params)
}
Mat float_img;
strm.putByte('P');
CHECK_WRITE(strm.putByte('P'));
switch (img.channels()) {
case 1:
strm.putByte('f');
CHECK_WRITE(strm.putByte('f'));
img.convertTo(float_img, CV_32FC1);
break;
case 3:
strm.putByte('F');
CHECK_WRITE(strm.putByte('F'));
img.convertTo(float_img, CV_32FC3);
break;
default:
CV_Error(Error::StsBadArg, "Expected 1 or 3 channel image.");
}
strm.putByte('\n');
CHECK_WRITE(strm.putByte('\n'));
write_anything(strm, float_img.cols);
strm.putByte(' ');
write_anything(strm, float_img.rows);
strm.putByte('\n');
CHECK_WRITE(write_anything(strm, float_img.cols));
CHECK_WRITE(strm.putByte(' '));
CHECK_WRITE(write_anything(strm, float_img.rows));
CHECK_WRITE(strm.putByte('\n'));
#ifdef WORDS_BIGENDIAN
write_anything(strm, 1.0);
CHECK_WRITE(write_anything(strm, 1.0));
#else
write_anything(strm, -1.0);
CHECK_WRITE(write_anything(strm, -1.0));
#endif
strm.putByte('\n');
CHECK_WRITE(strm.putByte('\n'));
// Comments are not officially supported in this file format.
// write_anything(strm, "# Generated by OpenCV " CV_VERSION "\n");
@ -248,17 +248,15 @@ bool PFMEncoder::write(const Mat& img, const std::vector<int>& params)
rgb_row[x*3+1] = bgr_row[x*3+1];
rgb_row[x*3+2] = bgr_row[x*3+0];
}
strm.putBytes( reinterpret_cast<const uchar*>(rgb_row.data()),
static_cast<int>(sizeof(float) * row_size) );
CHECK_WRITE(strm.putBytes( reinterpret_cast<const uchar*>(rgb_row.data()),
static_cast<int>(sizeof(float) * row_size) ));
} else if (float_img.channels() == 1) {
strm.putBytes(float_img.ptr(y), sizeof(float) * float_img.cols);
CHECK_WRITE(strm.putBytes(float_img.ptr(y), sizeof(float) * float_img.cols));
}
}
return true;
}
}
#endif // HAVE_IMGCODEC_PFM

@ -479,7 +479,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector<int>& params)
header_sz += sz;
}
strm.putBytes(buffer, header_sz);
CHECK_WRITE(strm.putBytes(buffer, header_sz));
for( y = 0; y < height; y++ )
{
@ -512,7 +512,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector<int>& params)
{
*ptr++ = byte;
}
strm.putBytes(buffer, (int)(ptr - buffer));
CHECK_WRITE(strm.putBytes(buffer, (int)(ptr - buffer)));
continue;
}
@ -539,7 +539,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector<int>& params)
}
}
strm.putBytes( (channels > 1 || depth > 8) ? buffer : (const char*)data, fileStep);
CHECK_WRITE(strm.putBytes( (channels > 1 || depth > 8) ? buffer : (const char*)data, fileStep));
}
else
{
@ -610,7 +610,7 @@ bool PxMEncoder::write(const Mat& img, const std::vector<int>& params)
*ptr++ = '\n';
strm.putBytes( buffer, (int)(ptr - buffer) );
CHECK_WRITE(strm.putBytes( buffer, (int)(ptr - buffer) ));
}
}

@ -410,17 +410,17 @@ bool SunRasterEncoder::write( const Mat& img, const std::vector<int>& )
if( strm.open(m_filename) )
{
strm.putBytes( fmtSignSunRas, (int)strlen(fmtSignSunRas) );
strm.putDWord( width );
strm.putDWord( height );
strm.putDWord( channels*8 );
strm.putDWord( fileStep*height );
strm.putDWord( RAS_STANDARD );
strm.putDWord( RMT_NONE );
strm.putDWord( 0 );
CHECK_WRITE(strm.putBytes( fmtSignSunRas, (int)strlen(fmtSignSunRas) ));
CHECK_WRITE(strm.putDWord( width ));
CHECK_WRITE(strm.putDWord( height ));
CHECK_WRITE(strm.putDWord( channels*8 ));
CHECK_WRITE(strm.putDWord( fileStep*height ));
CHECK_WRITE(strm.putDWord( RAS_STANDARD ));
CHECK_WRITE(strm.putDWord( RMT_NONE ));
CHECK_WRITE(strm.putDWord( 0 ));
for( y = 0; y < height; y++ )
strm.putBytes( img.ptr(y), fileStep );
CHECK_WRITE(strm.putBytes( img.ptr(y), fileStep ));
strm.close();
result = true;

@ -1100,16 +1100,6 @@ bool TiffEncoder::isFormatSupported( int depth ) const
return depth == CV_8U || depth == CV_8S || depth == CV_16U || depth == CV_16S || depth == CV_32S || depth == CV_32F || depth == CV_64F;
}
void TiffEncoder::writeTag( WLByteStream& strm, TiffTag tag,
TiffFieldType fieldType,
int count, int value )
{
strm.putWord( tag );
strm.putWord( fieldType );
strm.putDWord( count );
strm.putDWord( value );
}
class TiffEncoderBufHelper
{
public:

@ -132,10 +132,6 @@ public:
ImageEncoder newEncoder() const CV_OVERRIDE;
protected:
void writeTag( WLByteStream& strm, TiffTag tag,
TiffFieldType fieldType,
int count, int value );
bool writeLibTiff( const std::vector<Mat>& img_vec, const std::vector<int>& params );
bool write_32FC3_SGILOG(const Mat& img, void* tif);

@ -3771,10 +3771,11 @@ floating-point.
@param code color space conversion code (see #ColorConversionCodes).
@param dstCn number of channels in the destination image; if the parameter is 0, the number of the
channels is derived automatically from src and code.
@param hint Implementation modfication flags. See #AlgorithmHint
@see @ref imgproc_color_conversions
*/
CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0 );
CV_EXPORTS_W void cvtColor( InputArray src, OutputArray dst, int code, int dstCn = 0, AlgorithmHint hint = cv::ALGO_HINT_DEFAULT );
/** @brief Converts an image from one color space to another where the source image is
stored in two planes.
@ -3793,8 +3794,9 @@ This function only supports YUV420 to RGB conversion as of now.
- #COLOR_YUV2RGB_NV21
- #COLOR_YUV2BGRA_NV21
- #COLOR_YUV2RGBA_NV21
@param hint Implementation modfication flags. See #AlgorithmHint
*/
CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code );
CV_EXPORTS_W void cvtColorTwoPlane( InputArray src1, InputArray src2, OutputArray dst, int code, AlgorithmHint hint = cv::ALGO_HINT_DEFAULT );
/** @brief main function for all demosaicing processes

@ -108,11 +108,19 @@ CV_EXPORTS void warpAffine(int src_type,
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[6], int interpolation, int borderType, const double borderValue[4]);
CV_EXPORTS void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw);
CV_EXPORTS void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw);
CV_EXPORTS void warpPerspective(int src_type,
const uchar * src_data, size_t src_step, int src_width, int src_height,
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
const double M[9], int interpolation, int borderType, const double borderValue[4]);
CV_EXPORTS void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw);
CV_EXPORTS void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw);
CV_EXPORTS void cvtBGRtoBGR(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,

@ -12,6 +12,12 @@
#define CV_HAL_INTER_CUBIC 2
#define CV_HAL_INTER_AREA 3
#define CV_HAL_INTER_LANCZOS4 4
#define CV_HAL_INTER_LINEAR_EXACT 5
#define CV_HAL_INTER_NEAREST_EXACT 6
#define CV_HAL_INTER_MAX 7
#define CV_HAL_WARP_FILL_OUTLIERS 8
#define CV_HAL_WARP_INVERSE_MAP 16
#define CV_HAL_WARP_RELATIVE_MAP 32
//! @}
//! @name Morphology operations

@ -168,7 +168,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
// helper function for dual-plane modes
void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int code )
void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int code, AlgorithmHint hint )
{
// only YUV420 is currently supported
switch (code)
@ -181,7 +181,7 @@ void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, in
return;
}
cvtColorTwoPlaneYUV2BGRpair(_ysrc, _uvsrc, _dst, dstChannels(code), swapBlue(code), uIndex(code));
cvtColorTwoPlaneYUV2BGRpair(_ysrc, _uvsrc, _dst, hint, dstChannels(code), swapBlue(code), uIndex(code));
}
@ -189,10 +189,13 @@ void cvtColorTwoPlane( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, in
// The main function //
//////////////////////////////////////////////////////////////////////////////////////////
void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == cv::ALGO_HINT_DEFAULT)
hint = cv::getDefaultAlgorithmHint();
CV_Assert(!_src.empty());
if(dcn <= 0)
@ -244,12 +247,12 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
case COLOR_BGR2YUV: case COLOR_RGB2YUV:
cvtColorBGR2YUV(_src, _dst, swapBlue(code), code == COLOR_BGR2YCrCb || code == COLOR_RGB2YCrCb);
cvtColorBGR2YUV(_src, _dst, hint, swapBlue(code), code == COLOR_BGR2YCrCb || code == COLOR_RGB2YCrCb);
break;
case COLOR_YCrCb2BGR: case COLOR_YCrCb2RGB:
case COLOR_YUV2BGR: case COLOR_YUV2RGB:
cvtColorYUV2BGR(_src, _dst, dcn, swapBlue(code), code == COLOR_YCrCb2BGR || code == COLOR_YCrCb2RGB);
cvtColorYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), code == COLOR_YCrCb2BGR || code == COLOR_YCrCb2RGB);
break;
case COLOR_BGR2XYZ:
@ -321,14 +324,14 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
case COLOR_YUV2BGRA_NV21: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV12:
// http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples
// http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples
cvtColorTwoPlaneYUV2BGR(_src, _dst, dcn, swapBlue(code), uIndex(code));
cvtColorTwoPlaneYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), uIndex(code));
break;
case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12:
case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
//http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes.
//http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes
cvtColorThreePlaneYUV2BGR(_src, _dst, dcn, swapBlue(code), uIndex(code));
cvtColorThreePlaneYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), uIndex(code));
break;
case COLOR_YUV2GRAY_420:
@ -337,7 +340,7 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12:
case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV:
cvtColorBGR2ThreePlaneYUV(_src, _dst, swapBlue(code), uIndex(code));
cvtColorBGR2ThreePlaneYUV(_src, _dst, hint, swapBlue(code), uIndex(code));
break;
case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY:
@ -349,7 +352,7 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
{
int ycn = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2BGR_UYVY ||
code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0;
cvtColorOnePlaneYUV2BGR(_src, _dst, dcn, swapBlue(code), uIndex(code), ycn);
cvtColorOnePlaneYUV2BGR(_src, _dst, hint, dcn, swapBlue(code), uIndex(code), ycn);
break;
}
@ -362,7 +365,7 @@ void cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
{
int ycn = (code==COLOR_RGB2YUV_UYVY || code==COLOR_BGR2YUV_UYVY ||
code==COLOR_RGBA2YUV_UYVY || code==COLOR_BGRA2YUV_UYVY) ? 1 : 0;
cvtColorOnePlaneBGR2YUV(_src, _dst, swapBlue(code), uIndex(code), ycn);
cvtColorOnePlaneBGR2YUV(_src, _dst, hint, swapBlue(code), uIndex(code), ycn);
break;
}

@ -556,15 +556,15 @@ void cvtColorLuv2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bo
void cvtColorBGR2XYZ( InputArray _src, OutputArray _dst, bool swapb );
void cvtColorXYZ2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb );
void cvtColorBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, bool crcb);
void cvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb);
void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn);
void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, int uidx, int ycn);
void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx );
void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx );
void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx );
void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx);
void cvtColorBGR2YUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, bool crcb);
void cvtColorYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, bool crcb);
void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx, int ycn );
void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx, int ycn );
void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx );
void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx );
void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx );
void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx );
void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst );
void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi );

@ -18,13 +18,18 @@ namespace cv {
namespace hal {
// 8u, 16u, 32f
void cvtBGRtoYUV(const uchar * src_data, size_t src_step,
static void cvtBGRtoYUV(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,
int depth, int scn, bool swapBlue, bool isCbCr)
int depth, int scn, bool swapBlue, bool isCbCr, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtBGRtoYUV, cv_hal_cvtBGRtoYUVApprox, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isCbCr);
}
CALL_HAL(cvtBGRtoYUV, cv_hal_cvtBGRtoYUV, src_data, src_step, dst_data, dst_step, width, height, depth, scn, swapBlue, isCbCr);
#if defined(HAVE_IPP)
@ -66,13 +71,18 @@ void cvtBGRtoYUV(const uchar * src_data, size_t src_step,
CV_CPU_DISPATCH_MODES_ALL);
}
void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
static void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,
int depth, int dcn, bool swapBlue, bool isCbCr)
int depth, int dcn, bool swapBlue, bool isCbCr, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtYUVtoBGR, cv_hal_cvtYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isCbCr);
}
CALL_HAL(cvtYUVtoBGR, cv_hal_cvtYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, depth, dcn, swapBlue, isCbCr);
@ -115,63 +125,79 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
CV_CPU_DISPATCH_MODES_ALL);
}
// 4:2:0, two planes in one array: Y, UV interleaved
// 4:2:0, two planes: Y, UV interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step,
static void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step,
uchar * dst_data, size_t dst_step,
int dst_width, int dst_height,
int dcn, bool swapBlue, int uIdx)
int dcn, bool swapBlue, int uIdx, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtTwoPlaneYUVtoBGREx, cv_hal_cvtTwoPlaneYUVtoBGRExApprox,
y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
}
cvtTwoPlaneYUVtoBGR(
src_data, src_step, src_data + src_step * dst_height, src_step, dst_data, dst_step,
dst_width, dst_height, dcn, swapBlue, uIdx);
CALL_HAL(cvtTwoPlaneYUVtoBGREx, cv_hal_cvtTwoPlaneYUVtoBGREx,
y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx),
CV_CPU_DISPATCH_MODES_ALL);
}
// 4:2:0, two planes: Y, UV interleaved
// 4:2:0, two planes in one array: Y, UV interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step,
static void cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int dst_width, int dst_height,
int dcn, bool swapBlue, int uIdx)
int dcn, bool swapBlue, int uIdx, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
cvtTwoPlaneYUVtoBGR(y_data, src_step, uv_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
}
CALL_HAL(cvtTwoPlaneYUVtoBGR, cv_hal_cvtTwoPlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
cvtTwoPlaneYUVtoBGR(
src_data, src_step, src_data + src_step * dst_height, src_step, dst_data, dst_step,
dst_width, dst_height, dcn, swapBlue, uIdx, hint);
}
// 4:2:0, two planes: Y, UV interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtTwoPlaneYUVtoBGR(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step,
static void cvtTwoPlaneYUVtoBGR(const uchar * y_data, const uchar * uv_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int dst_width, int dst_height,
int dcn, bool swapBlue, int uIdx)
int dcn, bool swapBlue, int uIdx, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
CALL_HAL(cvtTwoPlaneYUVtoBGREx, cv_hal_cvtTwoPlaneYUVtoBGREx,
y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
CV_CPU_DISPATCH(cvtTwoPlaneYUVtoBGR, (y_data, y_step, uv_data, uv_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx),
CV_CPU_DISPATCH_MODES_ALL);
cvtTwoPlaneYUVtoBGR(y_data, src_step, uv_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx, hint);
}
// 4:2:0, three planes in one array: Y, U, V
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
static void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int dst_width, int dst_height,
int dcn, bool swapBlue, int uIdx)
int dcn, bool swapBlue, int uIdx, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtThreePlaneYUVtoBGR, cv_hal_cvtThreePlaneYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
}
CALL_HAL(cvtThreePlaneYUVtoBGR, cv_hal_cvtThreePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx);
CV_CPU_DISPATCH(cvtThreePlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, dst_width, dst_height, dcn, swapBlue, uIdx),
@ -181,46 +207,39 @@ void cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
// 4:2:0, three planes in one array: Y, U, V
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step,
static void cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,
int scn, bool swapBlue, int uIdx)
int scn, bool swapBlue, int uIdx, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtBGRtoThreePlaneYUV, cv_hal_cvtBGRtoThreePlaneYUVApprox, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx);
}
CALL_HAL(cvtBGRtoThreePlaneYUV, cv_hal_cvtBGRtoThreePlaneYUV, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx);
CV_CPU_DISPATCH(cvtBGRtoThreePlaneYUV, (src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx),
CV_CPU_DISPATCH_MODES_ALL);
}
// 4:2:0, two planes: Y, UV interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step,
uchar * y_data, uchar * uv_data, size_t dst_step,
int width, int height,
int scn, bool swapBlue, int uIdx)
{
CV_INSTRUMENT_REGION();
CALL_HAL(cvtBGRtoTwoPlaneYUV, cv_hal_cvtBGRtoTwoPlaneYUV,
src_data, src_step, y_data, dst_step, uv_data, dst_step, width, height, scn, swapBlue, uIdx);
CV_CPU_DISPATCH(cvtBGRtoTwoPlaneYUV, (src_data, src_step, y_data, uv_data, dst_step, width, height, scn, swapBlue, uIdx),
CV_CPU_DISPATCH_MODES_ALL);
}
// 4:2:2 interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
static void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,
int dcn, bool swapBlue, int uIdx, int ycn)
int dcn, bool swapBlue, int uIdx, int ycn, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtOnePlaneYUVtoBGR, cv_hal_cvtOnePlaneYUVtoBGRApprox, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn);
}
CALL_HAL(cvtOnePlaneYUVtoBGR, cv_hal_cvtOnePlaneYUVtoBGR, src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn);
CV_CPU_DISPATCH(cvtOnePlaneYUVtoBGR, (src_data, src_step, dst_data, dst_step, width, height, dcn, swapBlue, uIdx, ycn),
@ -230,13 +249,18 @@ void cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
// 4:2:2 interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 14-bit fixed-point arithmetics is used
void cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step,
static void cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step,
uchar * dst_data, size_t dst_step,
int width, int height,
int scn, bool swapBlue, int uIdx, int ycn)
int scn, bool swapBlue, int uIdx, int ycn, AlgorithmHint hint)
{
CV_INSTRUMENT_REGION();
if (hint == ALGO_HINT_APPROX)
{
CALL_HAL(cvtOnePlaneBGRtoYUV, cv_hal_cvtOnePlaneBGRtoYUVApprox, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx, ycn);
}
CALL_HAL(cvtOnePlaneBGRtoYUV, cv_hal_cvtOnePlaneBGRtoYUV, src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx, ycn);
CV_CPU_DISPATCH(cvtOnePlaneBGRtoYUV, (src_data, src_step, dst_data, dst_step, width, height, scn, swapBlue, uIdx, ycn),
@ -386,43 +410,43 @@ bool oclCvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, int bidx,
// HAL calls
//
void cvtColorBGR2YUV(InputArray _src, OutputArray _dst, bool swapb, bool crcb)
void cvtColorBGR2YUV(InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, bool crcb)
{
CvtHelper< Set<3, 4>, Set<3>, Set<CV_8U, CV_16U, CV_32F> > h(_src, _dst, 3);
hal::cvtBGRtoYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows,
h.depth, h.scn, swapb, crcb);
h.depth, h.scn, swapb, crcb, hint);
}
void cvtColorYUV2BGR(InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb)
void cvtColorYUV2BGR(InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, bool crcb)
{
if(dcn <= 0) dcn = 3;
CvtHelper< Set<3>, Set<3, 4>, Set<CV_8U, CV_16U, CV_32F> > h(_src, _dst, dcn);
hal::cvtYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows,
h.depth, dcn, swapb, crcb);
h.depth, dcn, swapb, crcb, hint);
}
// 4:2:2 interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn)
void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx, int ycn)
{
CvtHelper< Set<2>, Set<3, 4>, Set<CV_8U>, FROM_UYVY > h(_src, _dst, dcn);
hal::cvtOnePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows,
dcn, swapb, uidx, ycn);
dcn, swapb, uidx, ycn, hint);
}
// 4:2:2 interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 14-bit fixed-point arithmetics is used
void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, int uidx, int ycn)
void cvtColorOnePlaneBGR2YUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx, int ycn)
{
CvtHelper< Set<3, 4>, Set<2>, Set<CV_8U>, TO_UYVY > h(_src, _dst, 2);
hal::cvtOnePlaneBGRtoYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows,
h.scn, swapb, uidx, ycn);
h.scn, swapb, uidx, ycn, hint);
}
void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi )
@ -435,12 +459,12 @@ void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi )
// 4:2:0, three planes in one array: Y, U, V
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx)
void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, AlgorithmHint hint, bool swapb, int uidx)
{
CvtHelper< Set<3, 4>, Set<1>, Set<CV_8U>, TO_YUV > h(_src, _dst, 1);
hal::cvtBGRtoThreePlaneYUV(h.src.data, h.src.step, h.dst.data, h.dst.step, h.src.cols, h.src.rows,
h.scn, swapb, uidx);
h.scn, swapb, uidx, hint);
}
void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst )
@ -460,32 +484,32 @@ void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst )
// 4:2:0, three planes in one array: Y, U, V
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx)
void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx)
{
if(dcn <= 0) dcn = 3;
CvtHelper< Set<1>, Set<3, 4>, Set<CV_8U>, FROM_YUV> h(_src, _dst, dcn);
hal::cvtThreePlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows,
dcn, swapb, uidx);
dcn, swapb, uidx, hint);
}
// 4:2:0, two planes in one array: Y, UV interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
// see also: http://www.fourcc.org/yuv.php#NV21, http://www.fourcc.org/yuv.php#NV12
void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx )
void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx )
{
if(dcn <= 0) dcn = 3;
CvtHelper< Set<1>, Set<3, 4>, Set<CV_8U>, FROM_YUV> h(_src, _dst, dcn);
hal::cvtTwoPlaneYUVtoBGR(h.src.data, h.src.step, h.dst.data, h.dst.step, h.dst.cols, h.dst.rows,
dcn, swapb, uidx);
dcn, swapb, uidx, hint);
}
// 4:2:0, two planes: Y, UV interleaved
// Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
// 20-bit fixed-point arithmetics
void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx )
void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, AlgorithmHint hint, int dcn, bool swapb, int uidx )
{
int stype = _ysrc.type();
int depth = CV_MAT_DEPTH(stype);
@ -503,13 +527,13 @@ void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArr
{
hal::cvtTwoPlaneYUVtoBGR(ysrc.data, uvsrc.data, ysrc.step,
dst.data, dst.step, dst.cols, dst.rows,
dcn, swapb, uidx);
dcn, swapb, uidx, hint);
}
else
{
hal::cvtTwoPlaneYUVtoBGR(ysrc.data, ysrc.step, uvsrc.data, uvsrc.step,
dst.data, dst.step, dst.cols, dst.rows,
dcn, swapb, uidx);
dcn, swapb, uidx, hint);
}
}

@ -273,6 +273,29 @@ inline int hal_ni_resize(int src_type, const uchar *src_data, size_t src_step, i
@sa cv::warpAffine, cv::hal::warpAffine
*/
inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_warpAffineBlocklineNN doing a row of affine transformation
@param adelta input M0 * x array
@param bdelta input M3 * x array
@param xy output (x', y') coordinates
@param X0 input M1 * y + M2 value
@param Y0 input M4 * y + M5 value
@param bw length of the row
@sa cv::warpAffineBlocklineNN, cv::hal::warpAffineBlocklineNN
*/
inline int hal_ni_warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_warpAffineBlockline doing a row of affine transformation
@param adelta input M0 * x array
@param bdelta input M3 * x array
@param xy output (x', y') coordinates
@param alpha output least significant bits of the (x', y') coordinates for interpolation
@param X0 input M1 * y + M2 value
@param Y0 input M4 * y + M5 value
@param bw length of the row
@sa cv::warpAffineBlockline, cv::hal::warpAffineBlockline
*/
inline int hal_ni_warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_warpPerspective
@param src_type source and destination image type
@ -291,11 +314,38 @@ inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_ste
@sa cv::warpPerspective, cv::hal::warpPerspective
*/
inline int hal_ni_warpPerspective(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_warpPerspectiveBlocklineNN doing a row of perspective transformation
@param M 3x3 matrix with transform coefficients
@param xy output (x', y') coordinates
@param X0 input M0 * x0 + M1 * y + M2 value
@param Y0 input M3 * x0 + M4 * y + M5 value
@param W0 input M6 * x0 + M7 * y + M8 value
@param bw length of the row
@sa cv::warpPerspectiveBlocklineNN, cv::hal::warpPerspectiveBlocklineNN
*/
inline int hal_ni_warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_warpPerspectiveBlockline doing a row of perspective transformation
@param M 3x3 matrix with transform coefficients
@param xy output (x', y') coordinates
@param alpha output least significant bits of the (x', y') coordinates for interpolation
@param X0 input M0 * x0 + M1 * y + M2 value
@param Y0 input M3 * x0 + M4 * y + M5 value
@param W0 input M6 * x0 + M7 * y + M8 value
@param bw length of the row
@sa cv::warpPerspectiveBlockline, cv::hal::warpPerspectiveBlockline
*/
inline int hal_ni_warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
//! @cond IGNORED
#define cv_hal_resize hal_ni_resize
#define cv_hal_warpAffine hal_ni_warpAffine
#define cv_hal_warpAffineBlocklineNN hal_ni_warpAffineBlocklineNN
#define cv_hal_warpAffineBlockline hal_ni_warpAffineBlockline
#define cv_hal_warpPerspective hal_ni_warpPerspective
#define cv_hal_warpPerspectiveBlocklineNN hal_ni_warpPerspectiveBlocklineNN
#define cv_hal_warpPerspectiveBlockline hal_ni_warpPerspectiveBlockline
//! @endcond
/**
@ -449,6 +499,23 @@ inline int hal_ni_cvtGraytoBGR5x5(const uchar * src_data, size_t src_step, uchar
*/
inline int hal_ni_cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief Analog of hal_cvtBGRtoYUV, but allows approximations (not bit-exact)
@param src_data source image data
@param src_step source image step
@param dst_data destination image data
@param dst_step destination image step
@param width image width
@param height image height
@param depth image depth (one of CV_8U, CV_16U or CV_32F)
@param scn source image channels (3 or 4)
@param swapBlue if set to true B and R source channels will be swapped (treat as RGB)
@param isCbCr if set to true write output in YCbCr format
Convert from BGR, RGB, BGRA or RGBA to YUV or YCbCr.
*/
inline int hal_ni_cvtBGRtoYUVApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int scn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_cvtYUVtoBGR
@param src_data source image data
@ -465,6 +532,22 @@ inline int hal_ni_cvtBGRtoYUV(const uchar * src_data, size_t src_step, uchar * d
*/
inline int hal_ni_cvtYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief Analog of hal_cvtYUVtoBGR, but allows approximations (not bit-exact)
@param src_data source image data
@param src_step source image step
@param dst_data destination image data
@param dst_step destination image step
@param width image width
@param height image height
@param depth image depth (one of CV_8U, CV_16U or CV_32F)
@param dcn destination image channels (3 or 4)
@param swapBlue if set to true B and R destination channels will be swapped (write RGB)
@param isCbCr if set to true treat source as YCbCr
Convert from YUV or YCbCr to BGR, RGB, BGRA or RGBA.
*/
inline int hal_ni_cvtYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int depth, int dcn, bool swapBlue, bool isCbCr) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_cvtBGRtoXYZ
@param src_data source image data
@ -580,6 +663,24 @@ inline int hal_ni_cvtLabtoBGR(const uchar * src_data, size_t src_step, uchar * d
*/
inline int hal_ni_cvtTwoPlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief analog of hal_cvtTwoPlaneYUVtoBGR that allows approximations (not bit-exact)
@param src_data source image data
@param src_step source image step
@param dst_data destination image data
@param dst_step destination image step
@param dst_width destination image width
@param dst_height destination image height
@param dcn destination image channels (3 or 4)
@param swapBlue if set to true B and R destination channels will be swapped (write RGB)
@param uIdx U-channel index in the interleaved U/V plane (0 or 1)
Convert from YUV (YUV420sp (or NV12/NV21) - Y plane followed by interleaved U/V plane) to BGR, RGB, BGRA or RGBA.
Only for CV_8U.
Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
*/
inline int hal_ni_cvtTwoPlaneYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief Extended version of hal_cvtTwoPlaneYUVtoBGR.
@param y_data source image data (Y-plane)
@ -601,6 +702,27 @@ inline int hal_ni_cvtTwoPlaneYUVtoBGREx(const uchar * y_data, size_t y_step, con
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief Extended version of hal_cvtTwoPlaneYUVtoBGR that allows approximations (not bit-exact)
@param y_data source image data (Y-plane)
@param y_step source image step (Y-plane)
@param uv_data source image data (UV-plane)
@param uv_step source image step (UV-plane)
@param dst_data destination image data
@param dst_step destination image step
@param dst_width destination image width
@param dst_height destination image height
@param dcn destination image channels (3 or 4)
@param swapBlue if set to true B and R destination channels will be swapped (write RGB)
@param uIdx U-channel index in the interleaved U/V plane (0 or 1)
Convert from YUV (YUV420sp (or NV12/NV21) - Y plane followed by interleaved U/V plane) to BGR, RGB, BGRA or RGBA.
Only for CV_8U.
Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
*/
inline int hal_ni_cvtTwoPlaneYUVtoBGRExApprox(const uchar * y_data, size_t y_step, const uchar * uv_data, size_t uv_step,
uchar * dst_data, size_t dst_step, int dst_width, int dst_height,
int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_cvtBGRtoTwoPlaneYUV
@param src_data source image data
@ -640,6 +762,23 @@ inline int hal_ni_cvtBGRtoTwoPlaneYUV(const uchar * src_data, size_t src_step,
*/
inline int hal_ni_cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief Analog of hal_cvtThreePlaneYUVtoBGR that allows approximations (not bit-exact)
@param src_data source image data
@param src_step source image step
@param dst_data destination image data
@param dst_step destination image step
@param dst_width destination image width
@param dst_height destination image height
@param dcn destination image channels (3 or 4)
@param swapBlue if set to true B and R destination channels will be swapped (write RGB)
@param uIdx U-channel plane index (0 or 1)
Convert from YUV (YUV420p (or YV12/YV21) - Y plane followed by U and V planes) to BGR, RGB, BGRA or RGBA.
Only for CV_8U.
Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
*/
inline int hal_ni_cvtThreePlaneYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int dst_width, int dst_height, int dcn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_cvtBGRtoThreePlaneYUV
@param src_data source image data
@ -657,6 +796,24 @@ inline int hal_ni_cvtThreePlaneYUVtoBGR(const uchar * src_data, size_t src_step,
*/
inline int hal_ni_cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief Analog of hal_cvtBGRtoThreePlaneYUV that allows approximations (not bit-exact)
@param src_data source image data
@param src_step source image step
@param dst_data destination image data
@param dst_step destination image step
@param width image width
@param height image height
@param scn source image channels (3 or 4)
@param swapBlue if set to true B and R source channels will be swapped (treat as RGB)
@param uIdx U-channel plane index (0 or 1)
Convert from BGR, RGB, BGRA or RGBA to YUV (YUV420p (or YV12/YV21) - Y plane followed by U and V planes).
Only for CV_8U.
Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
*/
inline int hal_ni_cvtBGRtoThreePlaneYUVApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_cvtOnePlaneYUVtoBGR
@param src_data source image data
@ -675,6 +832,24 @@ inline int hal_ni_cvtBGRtoThreePlaneYUV(const uchar * src_data, size_t src_step,
*/
inline int hal_ni_cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int dcn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief analog of hal_cvtOnePlaneYUVtoBGR that allows approximations (not bit-exact)
@param src_data source image data
@param src_step source image step
@param dst_data destination image data
@param dst_step destination image step
@param width image width
@param height image height
@param dcn destination image channels (3 or 4)
@param swapBlue if set to true B and R destination channels will be swapped (write RGB)
@param uIdx U-channel index (0 or 1)
@param ycn Y-channel index (0 or 1)
Convert from interleaved YUV 4:2:2 (UYVY, YUY2 or YVYU) to BGR, RGB, BGRA or RGBA.
Only for CV_8U.
Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
*/
inline int hal_ni_cvtOnePlaneYUVtoBGRApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int dcn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_cvtOnePlaneBGRtoYUV
@param src_data,src_step source image data and step
@ -690,6 +865,21 @@ inline int hal_ni_cvtOnePlaneYUVtoBGR(const uchar * src_data, size_t src_step, u
*/
inline int hal_ni_cvtOnePlaneBGRtoYUV(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief analog of hal_cvtOnePlaneBGRtoYUV that allows approximations (not bit-exact)
@param src_data,src_step source image data and step
@param dst_data,dst_step destination image data and step
@param width,height image size
@param scn source image channels (3 or 4)
@param swapBlue if set to true B and R destination channels will be swapped (write RGB)
@param uIdx U-channel index (0 or 1)
@param ycn Y-channel index (0 or 1)
Convert from BGR, RGB, BGRA or RGBA to interleaved YUV 4:2:2 (UYVY, YUY2 or YVYU).
Only for CV_8U.
Y : [16, 235]; Cb, Cr: [16, 240] centered at 128
*/
inline int hal_ni_cvtOnePlaneBGRtoYUVApprox(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int scn, bool swapBlue, int uIdx, int ycn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
/**
@brief hal_cvtRGBAtoMultipliedRGBA
@param src_data source image data
@ -725,7 +915,9 @@ inline int hal_ni_cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_ste
#define cv_hal_cvtBGR5x5toGray hal_ni_cvtBGR5x5toGray
#define cv_hal_cvtGraytoBGR5x5 hal_ni_cvtGraytoBGR5x5
#define cv_hal_cvtBGRtoYUV hal_ni_cvtBGRtoYUV
#define cv_hal_cvtBGRtoYUVApprox hal_ni_cvtBGRtoYUVApprox
#define cv_hal_cvtYUVtoBGR hal_ni_cvtYUVtoBGR
#define cv_hal_cvtYUVtoBGRApprox hal_ni_cvtYUVtoBGRApprox
#define cv_hal_cvtBGRtoXYZ hal_ni_cvtBGRtoXYZ
#define cv_hal_cvtXYZtoBGR hal_ni_cvtXYZtoBGR
#define cv_hal_cvtBGRtoHSV hal_ni_cvtBGRtoHSV
@ -733,12 +925,18 @@ inline int hal_ni_cvtMultipliedRGBAtoRGBA(const uchar * src_data, size_t src_ste
#define cv_hal_cvtBGRtoLab hal_ni_cvtBGRtoLab
#define cv_hal_cvtLabtoBGR hal_ni_cvtLabtoBGR
#define cv_hal_cvtTwoPlaneYUVtoBGR hal_ni_cvtTwoPlaneYUVtoBGR
#define cv_hal_cvtTwoPlaneYUVtoBGRApprox hal_ni_cvtTwoPlaneYUVtoBGRApprox
#define cv_hal_cvtTwoPlaneYUVtoBGREx hal_ni_cvtTwoPlaneYUVtoBGREx
#define cv_hal_cvtTwoPlaneYUVtoBGRExApprox hal_ni_cvtTwoPlaneYUVtoBGRExApprox
#define cv_hal_cvtBGRtoTwoPlaneYUV hal_ni_cvtBGRtoTwoPlaneYUV
#define cv_hal_cvtThreePlaneYUVtoBGR hal_ni_cvtThreePlaneYUVtoBGR
#define cv_hal_cvtThreePlaneYUVtoBGRApprox hal_ni_cvtThreePlaneYUVtoBGRApprox
#define cv_hal_cvtBGRtoThreePlaneYUV hal_ni_cvtBGRtoThreePlaneYUV
#define cv_hal_cvtBGRtoThreePlaneYUVApprox hal_ni_cvtBGRtoThreePlaneYUVApprox
#define cv_hal_cvtOnePlaneYUVtoBGR hal_ni_cvtOnePlaneYUVtoBGR
#define cv_hal_cvtOnePlaneYUVtoBGRApprox hal_ni_cvtOnePlaneYUVtoBGRApprox
#define cv_hal_cvtOnePlaneBGRtoYUV hal_ni_cvtOnePlaneBGRtoYUV
#define cv_hal_cvtOnePlaneBGRtoYUVApprox hal_ni_cvtOnePlaneBGRtoYUVApprox
#define cv_hal_cvtRGBAtoMultipliedRGBA hal_ni_cvtRGBAtoMultipliedRGBA
#define cv_hal_cvtMultipliedRGBAtoRGBA hal_ni_cvtMultipliedRGBAtoRGBA
//! @endcond

@ -2169,16 +2169,7 @@ public:
short *XY = __XY.data(), *A = __A.data();
const int AB_BITS = MAX(10, (int)INTER_BITS);
const int AB_SCALE = 1 << AB_BITS;
int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, x1, y1;
#if CV_TRY_AVX2
bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
#endif
#if CV_TRY_SSE4_1
bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
#endif
#if CV_TRY_LASX
bool useLASX = CV_CPU_HAS_SUPPORT_LASX;
#endif
int round_delta = interpolation == INTER_NEAREST ? AB_SCALE/2 : AB_SCALE/INTER_TAB_SIZE/2, x, y, y1;
int bh0 = std::min(BLOCK_SZ/2, dst.rows);
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, dst.cols);
@ -2201,84 +2192,9 @@ public:
int Y0 = saturate_cast<int>((M[4]*(y + y1) + M[5])*AB_SCALE) + round_delta;
if( interpolation == INTER_NEAREST )
{
x1 = 0;
#if CV_TRY_SSE4_1
if( useSSE4_1 )
opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta + x, bdelta + x, xy, X0, Y0, bw);
else
#endif
{
#if CV_SIMD128
{
v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
int span = VTraits<v_uint16x8>::vlanes();
for( ; x1 <= bw - span; x1 += span )
{
v_int16x8 v_dst[2];
#define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset))),\
v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset + 4))))
v_dst[0] = CV_CONVERT_MAP(adelta, x+x1, v_X0);
v_dst[1] = CV_CONVERT_MAP(bdelta, x+x1, v_Y0);
#undef CV_CONVERT_MAP
v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]);
}
}
#endif
for( ; x1 < bw; x1++ )
{
int X = (X0 + adelta[x+x1]) >> AB_BITS;
int Y = (Y0 + bdelta[x+x1]) >> AB_BITS;
xy[x1*2] = saturate_cast<short>(X);
xy[x1*2+1] = saturate_cast<short>(Y);
}
}
}
hal::warpAffineBlocklineNN(adelta + x, bdelta + x, xy, X0, Y0, bw);
else
{
short* alpha = A + y1*bw;
x1 = 0;
#if CV_TRY_AVX2
if ( useAVX2 )
x1 = opt_AVX2::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw);
#endif
#if CV_TRY_LASX
if ( useLASX )
x1 = opt_LASX::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw);
#endif
#if CV_SIMD128
{
v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0);
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
int span = VTraits<v_float32x4>::vlanes();
for( ; x1 <= bw - span * 2; x1 += span * 2 )
{
v_int32x4 v_X0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(this->adelta + x + x1)));
v_int32x4 v_Y0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(this->bdelta + x + x1)));
v_int32x4 v_X1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(this->adelta + x + x1 + span)));
v_int32x4 v_Y1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(this->bdelta + x + x1 + span)));
v_int16x8 v_xy[2];
v_xy[0] = v_pack(v_shr<INTER_BITS>(v_X0), v_shr<INTER_BITS>(v_X1));
v_xy[1] = v_pack(v_shr<INTER_BITS>(v_Y0), v_shr<INTER_BITS>(v_Y1));
v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]);
v_int32x4 v_alpha0 = v_or(v_shl<INTER_BITS>(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask));
v_int32x4 v_alpha1 = v_or(v_shl<INTER_BITS>(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask));
v_store(alpha + x1, v_pack(v_alpha0, v_alpha1));
}
}
#endif
for( ; x1 < bw; x1++ )
{
int X = (X0 + adelta[x+x1]) >> (AB_BITS - INTER_BITS);
int Y = (Y0 + bdelta[x+x1]) >> (AB_BITS - INTER_BITS);
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
hal::warpAffineBlockline(adelta + x, bdelta + x, xy, A + y1*bw, X0, Y0, bw);
}
if( interpolation == INTER_NEAREST )
@ -2703,6 +2619,97 @@ void warpAffine(int src_type,
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
}
void warpAffineBlocklineNN(int *adelta, int *bdelta, short* xy, int X0, int Y0, int bw)
{
CALL_HAL(warpAffineBlocklineNN, cv_hal_warpAffineBlocklineNN, adelta, bdelta, xy, X0, Y0, bw);
const int AB_BITS = MAX(10, (int)INTER_BITS);
int x1 = 0;
#if CV_TRY_SSE4_1
bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
if( useSSE4_1 )
opt_SSE4_1::WarpAffineInvoker_Blockline_SSE41(adelta, bdelta, xy, X0, Y0, bw);
else
#endif
{
#if CV_SIMD128
{
v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
int span = VTraits<v_uint16x8>::vlanes();
for( ; x1 <= bw - span; x1 += span )
{
v_int16x8 v_dst[2];
#define CV_CONVERT_MAP(ptr,offset,shift) v_pack(v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset))),\
v_shr<AB_BITS>(v_add(shift,v_load(ptr + offset + 4))))
v_dst[0] = CV_CONVERT_MAP(adelta, x1, v_X0);
v_dst[1] = CV_CONVERT_MAP(bdelta, x1, v_Y0);
#undef CV_CONVERT_MAP
v_store_interleave(xy + (x1 << 1), v_dst[0], v_dst[1]);
}
}
#endif
for( ; x1 < bw; x1++ )
{
int X = (X0 + adelta[x1]) >> AB_BITS;
int Y = (Y0 + bdelta[x1]) >> AB_BITS;
xy[x1*2] = saturate_cast<short>(X);
xy[x1*2+1] = saturate_cast<short>(Y);
}
}
}
void warpAffineBlockline(int *adelta, int *bdelta, short* xy, short* alpha, int X0, int Y0, int bw)
{
CALL_HAL(warpAffineBlockline, cv_hal_warpAffineBlockline, adelta, bdelta, xy, alpha, X0, Y0, bw);
const int AB_BITS = MAX(10, (int)INTER_BITS);
int x1 = 0;
#if CV_TRY_AVX2
bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
if ( useAVX2 )
x1 = opt_AVX2::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw);
#endif
#if CV_TRY_LASX
bool useLASX = CV_CPU_HAS_SUPPORT_LASX;
if ( useLASX )
x1 = opt_LASX::warpAffineBlockline(adelta, bdelta, xy, alpha, X0, Y0, bw);
#endif
{
#if CV_SIMD128
{
v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0);
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
int span = VTraits<v_float32x4>::vlanes();
for( ; x1 <= bw - span * 2; x1 += span * 2 )
{
v_int32x4 v_X0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(adelta + x1)));
v_int32x4 v_Y0 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(bdelta + x1)));
v_int32x4 v_X1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__X0, v_load(adelta + x1 + span)));
v_int32x4 v_Y1 = v_shr<AB_BITS - INTER_BITS>(v_add(v__Y0, v_load(bdelta + x1 + span)));
v_int16x8 v_xy[2];
v_xy[0] = v_pack(v_shr<INTER_BITS>(v_X0), v_shr<INTER_BITS>(v_X1));
v_xy[1] = v_pack(v_shr<INTER_BITS>(v_Y0), v_shr<INTER_BITS>(v_Y1));
v_store_interleave(xy + (x1 << 1), v_xy[0], v_xy[1]);
v_int32x4 v_alpha0 = v_or(v_shl<INTER_BITS>(v_and(v_Y0, v_mask)), v_and(v_X0, v_mask));
v_int32x4 v_alpha1 = v_or(v_shl<INTER_BITS>(v_and(v_Y1, v_mask)), v_and(v_X1, v_mask));
v_store(alpha + x1, v_pack(v_alpha0, v_alpha1));
}
}
#endif
for( ; x1 < bw; x1++ )
{
int X = (X0 + adelta[x1]) >> (AB_BITS - INTER_BITS);
int Y = (Y0 + bdelta[x1]) >> (AB_BITS - INTER_BITS);
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
}
} // hal::
} // cv::
@ -3105,12 +3112,6 @@ public:
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
#if CV_TRY_SSE4_1
Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
if(CV_CPU_HAS_SUPPORT_SSE4_1)
pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
#endif
for( y = range.start; y < range.end; y += bh0 )
{
for( x = 0; x < width; x += bw0 )
@ -3129,57 +3130,9 @@ public:
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
if( interpolation == INTER_NEAREST )
{
#if CV_TRY_SSE4_1
if (pwarp_impl_sse4)
pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw);
else
#endif
#if CV_SIMD128_64F
WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw);
#else
for( int x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1*2] = saturate_cast<short>(X);
xy[x1*2+1] = saturate_cast<short>(Y);
}
#endif
}
hal::warpPerspectiveBlocklineNN(M, xy, X0, Y0, W0, bw);
else
{
short* alpha = A + y1*bw;
#if CV_TRY_SSE4_1
if (pwarp_impl_sse4)
pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw);
else
#endif
#if CV_SIMD128_64F
WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw);
#else
for( int x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
#endif
}
hal::warpPerspectiveBlockline(M, xy, A + y1*bw, X0, Y0, W0, bw);
}
if( interpolation == INTER_NEAREST )
@ -3272,6 +3225,74 @@ void warpPerspective(int src_type,
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
}
void warpPerspectiveBlocklineNN(const double *M, short* xy, double X0, double Y0, double W0, int bw)
{
CALL_HAL(warpPerspectiveBlocklineNN, cv_hal_warpPerspectiveBlocklineNN, M, xy, X0, Y0, W0, bw);
#if CV_TRY_SSE4_1
Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
if(CV_CPU_HAS_SUPPORT_SSE4_1)
pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
if (pwarp_impl_sse4)
pwarp_impl_sse4->processNN(M, xy, X0, Y0, W0, bw);
else
#endif
{
#if CV_SIMD128_64F
WarpPerspectiveLine_ProcessNN_CV_SIMD(M, xy, X0, Y0, W0, bw);
#else
for( int x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1*2] = saturate_cast<short>(X);
xy[x1*2+1] = saturate_cast<short>(Y);
}
#endif
}
}
void warpPerspectiveBlockline(const double *M, short* xy, short* alpha, double X0, double Y0, double W0, int bw)
{
CALL_HAL(warpPerspectiveBlockline, cv_hal_warpPerspectiveBlockline, M, xy, alpha, X0, Y0, W0, bw);
#if CV_TRY_SSE4_1
Ptr<opt_SSE4_1::WarpPerspectiveLine_SSE4> pwarp_impl_sse4;
if(CV_CPU_HAS_SUPPORT_SSE4_1)
pwarp_impl_sse4 = opt_SSE4_1::WarpPerspectiveLine_SSE4::getImpl(M);
if (pwarp_impl_sse4)
pwarp_impl_sse4->process(M, xy, alpha, X0, Y0, W0, bw);
else
#endif
{
#if CV_SIMD128_64F
WarpPerspectiveLine_Process_CV_SIMD(M, xy, alpha, X0, Y0, W0, bw);
#else
for( int x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
double fX = std::max((double)INT_MIN, std::min((double)INT_MAX, (X0 + M[0]*x1)*W));
double fY = std::max((double)INT_MIN, std::min((double)INT_MAX, (Y0 + M[3]*x1)*W));
int X = saturate_cast<int>(fX);
int Y = saturate_cast<int>(fY);
xy[x1*2] = saturate_cast<short>(X >> INTER_BITS);
xy[x1*2+1] = saturate_cast<short>(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
#endif
}
}
} // hal::
} // cv::

@ -2657,7 +2657,7 @@ TEST(Imgproc_ColorLab_Full, bitExactness)
Mat probe(256, 256, CV_8UC3), result;
rng.fill(probe, RNG::UNIFORM, 0, 255, true);
cvtColor(probe, result, codes[c]);
cvtColor(probe, result, codes[c], 0, ALGO_HINT_ACCURATE);
uint32_t h = adler32(result);
uint32_t goodHash = hashes[c*nIterations + iter];
@ -2749,7 +2749,7 @@ TEST(Imgproc_ColorLuv_Full, bitExactness)
Mat probe(256, 256, CV_8UC3), result;
rng.fill(probe, RNG::UNIFORM, 0, 255, true);
cvtColor(probe, result, codes[c]);
cvtColor(probe, result, codes[c], 0, ALGO_HINT_ACCURATE);
uint32_t h = adler32(result);
uint32_t goodHash = hashes[c*nIterations + iter];
@ -2808,7 +2808,7 @@ void runCvtColorBitExactCheck(ColorConversionCodes code, int inputType, uint32_t
Mat dst;
rng.fill(src, RNG::UNIFORM, 0, 255, true);
cv::cvtColor(src, dst, code);
cv::cvtColor(src, dst, code, 0, ALGO_HINT_ACCURATE);
uint32_t dst_hash = adler32(dst);

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if(isNodeJs) {
var Base = require("./base");

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -1,4 +1,4 @@
const isNodeJs = (typeof window) === 'undefined'? true : false;
var isNodeJs = (typeof window) === 'undefined'? true : false;
if (isNodeJs) {
var Benchmark = require('benchmark');

@ -854,7 +854,22 @@ class FuncInfo(object):
all_code_variants = []
# See https://github.com/opencv/opencv/issues/25928
# Conversion to UMat is expensive more than conversion to Mat.
# To reduce this cost, conversion to Mat is prefer than to UMat.
variants = []
variants_umat = []
for v in self.variants:
hasUMat = False
for a in v.args:
hasUMat = hasUMat or "UMat" in a.tp
if hasUMat :
variants_umat.append(v)
else:
variants.append(v)
variants.extend(variants_umat)
for v in variants:
code_decl = ""
code_ret = ""
code_cvt_list = []

@ -958,7 +958,7 @@ class CanUsePurePythonModuleFunction(NewOpenCVTests):
class SamplesFindFile(NewOpenCVTests):
def test_ExistedFile(self):
res = cv.samples.findFile('lena.jpg', False)
res = cv.samples.findFile('HappyFish.jpg', False)
self.assertNotEqual(res, '')
def test_MissingFile(self):

@ -1,7 +1,7 @@
# --- obsensor ---
if(NOT HAVE_OBSENSOR)
if(OBSENSOR_USE_ORBBEC_SDK)
include(${CMAKE_SOURCE_DIR}/3rdparty/orbbecsdk/orbbecsdk.cmake)
include("${OpenCV_SOURCE_DIR}/3rdparty/orbbecsdk/orbbecsdk.cmake")
download_orbbec_sdk(ORBBEC_SDK_ROOT_DIR)
message(STATUS "ORBBEC_SDK_ROOT_DIR: ${ORBBEC_SDK_ROOT_DIR}")
if(ORBBEC_SDK_ROOT_DIR)

Loading…
Cancel
Save