From 233612efd7925bd022777d297fdf65215f16dcc8 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 8 Apr 2016 16:03:51 +0300 Subject: [PATCH] Reworked HAL dft/dct interface, added replacement documentation --- modules/core/include/opencv2/core/hal/hal.hpp | 32 +- .../core/include/opencv2/core/hal/interface.h | 25 +- modules/core/src/dxt.cpp | 326 ++++++++---------- modules/core/src/hal_replacement.hpp | 105 +++++- modules/imgproc/src/templmatch.cpp | 19 +- 5 files changed, 273 insertions(+), 234 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 6b9f93dbff..5b01cbe4cd 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -187,24 +187,28 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); -struct DftContext +struct CV_EXPORTS DFT1D { - void * impl; - bool useReplacement; - DftContext() : impl(0), useReplacement(false) {} + static Ptr create(int len, int count, int depth, int flags, bool * useBuffer = 0); + virtual void apply(const uchar *src, uchar *dst) = 0; + virtual ~DFT1D() {} }; -CV_EXPORTS void dftInit2D(DftContext & c, int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows = 0); -CV_EXPORTS void dft2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); -CV_EXPORTS void dftFree2D(DftContext & c); - -CV_EXPORTS void dftInit1D(DftContext & c, int len, int count, int depth, int flags, bool * useBuffer = 0); -CV_EXPORTS void dft1D(const DftContext & c, const void * src, void * dst); -CV_EXPORTS void dftFree1D(DftContext & c); +struct CV_EXPORTS DFT2D +{ + static Ptr create(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows = 0); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DFT2D() {} +}; -CV_EXPORTS void dctInit2D(DftContext & c, int width, int height, int depth, int flags); -CV_EXPORTS void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); -CV_EXPORTS void dctFree2D(DftContext & c); +struct CV_EXPORTS DCT2D +{ + static Ptr create(int width, int height, int depth, int flags); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DCT2D() {} +}; //! @} core_hal diff --git a/modules/core/include/opencv2/core/hal/interface.h b/modules/core/include/opencv2/core/hal/interface.h index 0da68f18cd..2bb7b19f21 100644 --- a/modules/core/include/opencv2/core/hal/interface.h +++ b/modules/core/include/opencv2/core/hal/interface.h @@ -11,21 +11,11 @@ #define CV_HAL_ERROR_UNKNOWN -1 //! @} - -#define CV_HAL_DFT_INVERSE 1 -#define CV_HAL_DFT_SCALE 2 -#define CV_HAL_DFT_ROWS 4 -#define CV_HAL_DFT_COMPLEX_OUTPUT 16 -#define CV_HAL_DFT_REAL_OUTPUT 32 -#define CV_HAL_DFT_TWO_STAGE 64 -#define CV_HAL_DFT_STAGE_COLS 128 -#define CV_HAL_DFT_IS_CONTINUOUS 512 -#define CV_HAL_DFT_IS_INPLACE 1024 - #ifdef __cplusplus #include #else #include +#include #endif //! @name Data types @@ -155,6 +145,19 @@ typedef signed char schar; #define CV_HAL_BORDER_ISOLATED 16 //! @} +//! @name DFT flags +//! @{ +#define CV_HAL_DFT_INVERSE 1 +#define CV_HAL_DFT_SCALE 2 +#define CV_HAL_DFT_ROWS 4 +#define CV_HAL_DFT_COMPLEX_OUTPUT 16 +#define CV_HAL_DFT_REAL_OUTPUT 32 +#define CV_HAL_DFT_TWO_STAGE 64 +#define CV_HAL_DFT_STAGE_COLS 128 +#define CV_HAL_DFT_IS_CONTINUOUS 512 +#define CV_HAL_DFT_IS_INPLACE 1024 +//! @} + //! @} #endif diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 1ea5496753..2cff51d5a3 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1553,7 +1553,7 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_C_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + Dft_C_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, const Dft& _ippidft, int _norm_flag, bool *_ok) : ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), @@ -1617,7 +1617,7 @@ public: } private: - uchar * src; + const uchar * src; int src_step; uchar * dst; int dst_step; @@ -1634,7 +1634,7 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_R_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + Dft_R_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, const Dft& _ippidft, int _norm_flag, bool *_ok) : ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), @@ -1698,7 +1698,7 @@ public: } private: - uchar * src; + const uchar * src; int src_step; uchar * dst; int dst_step; @@ -1711,7 +1711,7 @@ private: }; template -bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) +bool Dft_C_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); @@ -1719,7 +1719,7 @@ bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int wid } template -bool Dft_R_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) +bool Dft_R_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); @@ -1750,7 +1750,7 @@ private: ippiDFT_R_Func func; }; -static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) +static bool ippi_DFT_C_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -1804,7 +1804,7 @@ static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, return false; } -static bool ippi_DFT_R_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) +static bool ippi_DFT_R_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -2611,11 +2611,11 @@ inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuo return InvalidDim; } -class OcvDftImpl +class OcvDftImpl : public hal::DFT2D { protected: - hal::DftContext contextA; - hal::DftContext contextB; + Ptr contextA; + Ptr contextB; bool needBufferA; bool needBufferB; bool inv; @@ -2763,7 +2763,7 @@ public: count = height; } needBufferA = isInplace; - hal::dftInit1D(contextA, len, count, depth, f, &needBufferA); + contextA = hal::DFT1D::create(len, count, depth, f, &needBufferA); if (needBufferA) tmp_bufA.allocate(len * complex_elem_size); } @@ -2773,7 +2773,7 @@ public: count = width; f |= CV_HAL_DFT_STAGE_COLS; needBufferB = isInplace; - hal::dftInit1D(contextB, len, count, depth, f, &needBufferB); + contextB = hal::DFT1D::create(len, count, depth, f, &needBufferB); if (needBufferB) tmp_bufB.allocate(len * complex_elem_size); @@ -2783,7 +2783,7 @@ public: } } - void run(uchar * src, int src_step, uchar * dst, int dst_step) + void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step) { #if defined USE_IPP_DFT if (useIpp) @@ -2860,17 +2860,9 @@ public: } } - void free() - { - if (useIpp) - return; - hal::dftFree1D(contextA); - hal::dftFree1D(contextB); - } - protected: - void rowDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage) + void rowDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage) { int len, count; if (width == 1 && !isRowTransform ) @@ -2909,7 +2901,7 @@ protected: if( needBufferA ) dptr = tmp_bufA; - hal::dft1D(contextA, sptr, dptr); + contextA->apply(sptr, dptr); if( needBufferA ) memcpy( dptr0, dptr + dptr_offset, dst_full_len ); @@ -2924,7 +2916,7 @@ protected: complementComplexOutput(depth, dst_data, dst_step, len, nz, 1); } - void colDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) + void colDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) { int len = height; int count = width; @@ -2983,8 +2975,8 @@ protected: } if( even ) - hal::dft1D(contextB, buf1, dbuf1); - hal::dft1D(contextB, buf0, dbuf0); + contextB->apply(buf1, dbuf1); + contextB->apply(buf0, dbuf0); if( stage_dst_channels == 1 ) { @@ -3032,12 +3024,12 @@ protected: if( i+1 < b ) { CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size ); - hal::dft1D(contextB, buf1, dbuf1); + contextB->apply(buf1, dbuf1); } else CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size ); - hal::dft1D(contextB, buf0, dbuf0); + contextB->apply(buf0, dbuf0); if( i+1 < b ) CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size ); @@ -3051,7 +3043,7 @@ protected: } }; -class OcvDftBasicImpl +class OcvDftBasicImpl : public hal::DFT1D { public: OcvDftOptions opt; @@ -3068,11 +3060,6 @@ public: { opt.factors = _factors; } - OcvDftBasicImpl & operator=(const OcvDftBasicImpl & other) - { - this->opt = other.opt; - return *this; - } void init(int len, int count, int depth, int flags, bool *needBuffer) { int prev_len = opt.n; @@ -3211,7 +3198,7 @@ public: } } - void run(const void * src, void * dst) + void apply(const uchar *src, uchar *dst) { opt.dft_func(opt, src, dst); } @@ -3219,126 +3206,113 @@ public: void free() {} }; -namespace hal { - -//================== 1D ====================== - -void dftInit1D(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer) +struct ReplacementDFT1D : public hal::DFT1D { - int res = cv_hal_dftInit1D(&context.impl, len, count, depth, flags, needBuffer); - if (res == CV_HAL_ERROR_OK) + cvhalDFT *context; + bool isInitialized; + + ReplacementDFT1D() : context(0), isInitialized(false) {} + bool init(int len, int count, int depth, int flags, bool *needBuffer) { - context.useReplacement = true; - return; + int res = cv_hal_dftInit1D(&context, len, count, depth, flags, needBuffer); + isInitialized = (res == CV_HAL_ERROR_OK); + return isInitialized; } - - context.useReplacement = false; - OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; - if (!c) + void apply(const uchar *src, uchar *dst) { - c = new OcvDftBasicImpl(); - context.impl = (void*)c; + if (isInitialized) + { + CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst); + } } - c->init(len, count, depth, flags, needBuffer); -} - -void dft1D(const DftContext & context, const void * src, void * dst) -{ - if (context.useReplacement) + ~ReplacementDFT1D() { - int res = cv_hal_dft1D(context.impl, src, dst); - if (res != CV_HAL_ERROR_OK) + if (isInitialized) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun"); + CALL_HAL(dftFree1D, cv_hal_dftFree1D, context); } - return; } - OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; - c->run(src, dst); -} +}; -void dftFree1D(DftContext & context) +struct ReplacementDFT2D : public hal::DFT2D { - if (context.useReplacement) + cvhalDFT *context; + bool isInitialized; + + ReplacementDFT2D() : context(0), isInitialized(false) {} + bool init(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows) + { + int res = cv_hal_dftInit2D(&context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows); + isInitialized = (res == CV_HAL_ERROR_OK); + return isInitialized; + } + void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) { - int res = cv_hal_dftFree1D(context.impl); - if (res != CV_HAL_ERROR_OK) + if (isInitialized) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree"); + CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step); } - return; } - - OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; - if (c) + ~ReplacementDFT2D() { - c->free(); - delete c; - context.impl = 0; + if (isInitialized) + { + CALL_HAL(dftFree2D, cv_hal_dftFree1D, context); + } } -} +}; +namespace hal { -//================== 2D ====================== +//================== 1D ====================== -void dftInit2D(DftContext & c, - int _width, int _height, int _depth, int _src_channels, int _dst_channels, - int flags, - int _nonzero_rows) +Ptr DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer) { - int res = cv_hal_dftInit2D(&c.impl, _width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows); - if (res == CV_HAL_ERROR_OK) { - c.useReplacement = true; - return; + ReplacementDFT1D *impl = new ReplacementDFT1D(); + if (impl->init(len, count, depth, flags, needBuffer)) + { + return Ptr(impl); + } + delete impl; + } + { + OcvDftBasicImpl *impl = new OcvDftBasicImpl(); + impl->init(len, count, depth, flags, needBuffer); + return Ptr(impl); } - c.useReplacement = false; - - if( _width == 1 && _nonzero_rows > 0 ) - CV_Error( CV_StsNotImplemented, - "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n" - "For fast convolution/correlation use 2-column matrix or single-row matrix instead" ); - - OcvDftImpl * d = new OcvDftImpl(); - d->init(_width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows); - c.impl = (void*)d; } -void dft2D(const DftContext & c, - const void * src, int src_step, void * dst, int dst_step) +//================== 2D ====================== + +Ptr DFT2D::create(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows) { - if (c.useReplacement) { - int res = cv_hal_dft2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step); - if (res != CV_HAL_ERROR_OK) + ReplacementDFT2D *impl = new ReplacementDFT2D(); + if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows)) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun2D"); + return Ptr(impl); } - return; + delete impl; } - OcvDftImpl * d = (OcvDftImpl*)c.impl; - d->run((uchar*)src, src_step, (uchar*)dst, dst_step); -} - -void dftFree2D(DftContext & c) -{ - if (c.useReplacement) { - int res = cv_hal_dftFree2D(c.impl); - if (res != CV_HAL_ERROR_OK) + if(width == 1 && nonzero_rows > 0 ) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree2D"); + CV_Error( CV_StsNotImplemented, + "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n" + "For fast convolution/correlation use 2-column matrix or single-row matrix instead" ); } - return; + OcvDftImpl *impl = new OcvDftImpl(); + impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows); + return Ptr(impl); } - OcvDftImpl * d = (OcvDftImpl*)c.impl; - d->free(); - delete d; - c.impl = 0; } } // cv::hal:: - } // cv:: @@ -3382,10 +3356,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) f |= CV_HAL_DFT_SCALE; if (src.data == dst.data) f |= CV_HAL_DFT_IS_INPLACE; - hal::DftContext c; - hal::dftInit2D(c, src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows); - hal::dft2D(c, src.data, (int)src.step, dst.data, (int)dst.step); - hal::dftFree2D(c); + Ptr c = hal::DFT2D::create(src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows); + c->apply(src.data, src.step, dst.data, dst.step); } @@ -3607,7 +3579,7 @@ namespace cv http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/: */ template static void -DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, +DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step, const Complex* dct_wave ) { static const T sin_45 = (T)0.70710678118654752440084436210485; @@ -3650,7 +3622,7 @@ DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst template static void -IDCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, +IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step, const Complex* dct_wave) { static const T sin_45 = (T)0.70710678118654752440084436210485; @@ -3768,29 +3740,29 @@ DCTInit( int n, int elem_size, void* _wave, int inv ) } -typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, int src_step, void* dft_src, - void* dft_dst, void* dst, int dst_step, const void* dct_wave); +typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, size_t src_step, void* dft_src, + void* dft_dst, void* dst, size_t dst_step, const void* dct_wave); -static void DCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, - float* dst, int dst_step, const Complexf* dct_wave) +static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst, + float* dst, size_t dst_step, const Complexf* dct_wave) { DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void IDCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, - float* dst, int dst_step, const Complexf* dct_wave) +static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst, + float* dst, size_t dst_step, const Complexf* dct_wave) { IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void DCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, - double* dst, int dst_step, const Complexd* dct_wave) +static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst, + double* dst, size_t dst_step, const Complexd* dct_wave) { DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void IDCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, - double* dst, int dst_step, const Complexd* dct_wave) +static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst, + double* dst, size_t dst_step, const Complexd* dct_wave) { IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } @@ -4058,7 +4030,7 @@ static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_s namespace cv { -class OcvDctImpl +class OcvDctImpl : public hal::DCT2D { public: OcvDftOptions opt; @@ -4110,7 +4082,7 @@ public: end_stage = 1; } } - void run(uchar * src, int src_step, uchar * dst, int dst_step) + void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) { CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform)) @@ -4183,69 +4155,65 @@ public: prev_len = len; } // otherwise reuse the tables calculated on the previous stage - for(int i = 0; i < count; i++ ) + for(unsigned i = 0; i < static_cast(count); i++ ) { - dct_func( opt, sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf, - dptr + i*dstep0, (int)dstep1, dct_wave); + dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf, + dptr + i*dstep0, dstep1, dct_wave); } src = dst; src_step = dst_step; } - } - void free() {} }; -namespace hal { - -void dctInit2D(DftContext & c, int width, int height, int depth, int flags) +struct ReplacementDCT2D : public hal::DCT2D { - int res = cv_hal_dctInit2D(&c.impl, width, height, depth, flags); - if (res == CV_HAL_ERROR_OK) + cvhalDFT *context; + bool isInitialized; + + ReplacementDCT2D() : context(0), isInitialized(false) {} + bool init(int width, int height, int depth, int flags) { - c.useReplacement = true; - return; + int res = hal_ni_dctInit2D(&context, width, height, depth, flags); + isInitialized = (res == CV_HAL_ERROR_OK); + return isInitialized; } - c.useReplacement = false; - OcvDctImpl * impl = new OcvDctImpl(); - impl->init(width, height, depth, flags); - c.impl = impl; -} - -void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step) -{ - if (c.useReplacement) + void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { - int res = cv_hal_dct2D(c.impl, src, src_step, dst, dst_step); - if (res != CV_HAL_ERROR_OK) + if (isInitialized) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctRun"); + CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step); } - return; } - OcvDctImpl * impl = (OcvDctImpl*)c.impl; - impl->run((uchar*)src, src_step, (uchar*)dst, dst_step); -} + ~ReplacementDCT2D() + { + if (isInitialized) + { + CALL_HAL(dctFree2D, cv_hal_dctFree2D, context); + } + } +}; + +namespace hal { -void dctFree2D(DftContext & c) +Ptr DCT2D::create(int width, int height, int depth, int flags) { - if (c.useReplacement) { - int res = cv_hal_dctFree2D(c.impl); - if (res != CV_HAL_ERROR_OK) + ReplacementDCT2D *impl = new ReplacementDCT2D(); + if (impl->init(width, height, depth, flags)) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctFree"); + return Ptr(impl); } - return; + delete impl; + } + { + OcvDctImpl *impl = new OcvDctImpl(); + impl->init(width, height, depth, flags); + return Ptr(impl); } - OcvDctImpl * impl = (OcvDctImpl*)c.impl; - impl->free(); - delete impl; - c.impl = 0; } } // cv::hal:: - } // cv:: void cv::dct( InputArray _src0, OutputArray _dst, int flags ) @@ -4265,10 +4233,8 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) if (src.isContinuous() && dst.isContinuous()) f |= CV_HAL_DFT_IS_CONTINUOUS; - hal::DftContext c; - hal::dctInit2D(c, src.cols, src.rows, depth, f); - hal::dct2D(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step); - hal::dctFree2D(c); + Ptr c = hal::DCT2D::create(src.cols, src.rows, depth, f); + c->apply(src.data, src.step, dst.data, dst.step); } diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index bbf32f39d8..93476c4594 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -376,38 +376,109 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int #define cv_hal_merge64s hal_ni_merge64s //! @endcond -//! @} - -#if defined __GNUC__ -# pragma GCC diagnostic pop -#elif defined _MSC_VER -# pragma warning( pop ) -#endif +/** +@brief Dummy structure storing DFT/DCT context + +Users can convert this pointer to any type they want. Initialisation and destruction should be made in Init and Free function implementations correspondingly. +Example: +@code{.cpp} +int my_hal_dftInit2D(cvhalDFT **context, ...) { + *context = static_cast(new MyFilterData()); + //... init +} + +int my_hal_dftFree2D(cvhalDFT *context) { + MyFilterData *c = static_cast(context); + delete c; +} +@endcode + */ +struct cvhalDFT {}; -inline int hal_ni_dftInit1D(void**, int, int, int, int, bool*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dft1D(const void*, const void*, void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dftFree1D(void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context double pointer to context storing all necessary data +@param len transformed array length +@param count estimated transformation count +@param depth array type (CV_32F or CV_64F) +@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, CV_HAL_DFT_SCALE, ...) +@param needBuffer pointer to boolean variable, if valid pointer provided, then variable value should be set to true to signal that additional memory buffer is needed for operations + */ +inline int hal_ni_dftInit1D(cvhalDFT **context, int len, int count, int depth, int flags, bool *needBuffer) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data +@param src source data +@param dst destination data + */ +inline int hal_ni_dft1D(cvhalDFT *context, const uchar *src, uchar *dst) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data + */ +inline int hal_ni_dftFree1D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @cond IGNORED #define cv_hal_dftInit1D hal_ni_dftInit1D #define cv_hal_dft1D hal_ni_dft1D #define cv_hal_dftFree1D hal_ni_dftFree1D +//! @endcond -inline int hal_ni_dftInit2D(void **, int, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dft2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dftFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context double pointer to context storing all necessary data +@param width,height image dimensions +@param depth image type (CV_32F or CV64F) +@param src_channels number of channels in input image +@param dst_channels number of channels in output image +@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...) +@param nonzero_rows number of nonzero rows in image, can be used for optimization + */ +inline int hal_ni_dftInit2D(cvhalDFT **context, int width, int height, int depth, int src_channels, int dst_channels, int flags, int nonzero_rows) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data +@param src_data,src_step source image data and step +@param dst_data,dst_step destination image data and step + */ +inline int hal_ni_dft2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data + */ +inline int hal_ni_dftFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @cond IGNORED #define cv_hal_dftInit2D hal_ni_dftInit2D #define cv_hal_dft2D hal_ni_dft2D #define cv_hal_dftFree2D hal_ni_dftFree2D +//! @endcond +/** +@param context double pointer to context storing all necessary data +@param width,height image dimensions +@param depth image type (CV_32F or CV64F) +@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...) + */ +inline int hal_ni_dctInit2D(cvhalDFT **context, int width, int height, int depth, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data +@param src_data,src_step source image data and step +@param dst_data,dst_step destination image data and step + */ +inline int hal_ni_dct2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data + */ +inline int hal_ni_dctFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dctInit2D(void **, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dct2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dctFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } - +//! @cond IGNORED #define cv_hal_dctInit2D hal_ni_dctInit2D #define cv_hal_dct2D hal_ni_dct2D #define cv_hal_dctFree2D hal_ni_dctFree2D +//! @endcond + +//! @} + +#if defined __GNUC__ +# pragma GCC diagnostic pop +#elif defined _MSC_VER +# pragma warning( pop ) +#endif #include "custom_hal.hpp" diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp index 4e89582798..019c41f33b 100644 --- a/modules/imgproc/src/templmatch.cpp +++ b/modules/imgproc/src/templmatch.cpp @@ -700,8 +700,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, buf.resize(bufSize); - hal::DftContext c; - hal::dftInit2D(c, dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows); + Ptr c = hal::DFT2D::create(dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows); // compute DFT of each template plane for( k = 0; k < tcn; k++ ) @@ -726,11 +725,9 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols)); part = Scalar::all(0); } - hal::dft2D(c, dst.data, (int)dst.step, dst.data, (int)dst.step); + c->apply(dst.data, (int)dst.step, dst.data, (int)dst.step); } - hal::dftFree2D(c); - int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width; int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height; int tileCount = tileCountX * tileCountY; @@ -747,11 +744,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } borderType |= BORDER_ISOLATED; - hal::DftContext cF, cR; + Ptr cF, cR; int f = CV_HAL_DFT_IS_INPLACE; int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE; - hal::dftInit2D(cF, dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); - hal::dftInit2D(cR, dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); + cF = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); + cR = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); // calculate correlation by blocks for( i = 0; i < tileCount; i++ ) @@ -791,7 +788,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); if (bsz.height == blocksize.height) - hal::dft2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + cF->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, 0, dsz.height ); @@ -800,7 +797,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); if (bsz.height == blocksize.height) - hal::dft2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + cR->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); @@ -834,8 +831,6 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } } } - hal::dftFree2D(cF); - hal::dftFree2D(cR); } static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )