Merge pull request #7106 from lupustr3:pvlasov/ipp_hal_fix

pull/7101/head
Maksim Shabunin 9 years ago
commit cd943cc60d
  1. 8
      modules/core/include/opencv2/core/private.hpp
  2. 89
      modules/core/src/mathfuncs.cpp
  3. 74
      modules/core/src/mathfuncs_core.cpp

@ -382,9 +382,15 @@ static struct __IppInitializer__ __ipp_initializer__;
return __VA_ARGS__; \ return __VA_ARGS__; \
} }
#endif #endif
#define CV_IPP_RUN_FAST(func, ...) \
if (cv::ipp::useIPP() && (func)) \
{ \
CV_IMPL_ADD(CV_IMPL_IPP); \
return __VA_ARGS__; \
}
#else #else
#define CV_IPP_RUN_(condition, func, ...) #define CV_IPP_RUN_(condition, func, ...)
#define CV_IPP_RUN_FAST(func, ...)
#endif #endif
#define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__) #define CV_IPP_RUN(condition, func, ...) CV_IPP_RUN_(condition, func, __VA_ARGS__)

@ -618,43 +618,6 @@ void polarToCart( InputArray src1, InputArray src2,
* E X P * * E X P *
\****************************************************************************************/ \****************************************************************************************/
#ifdef HAVE_IPP
static void Exp_32f_ipp(const float *x, float *y, int n)
{
CV_IPP_CHECK()
{
if (0 <= ippsExp_32f_A21(x, y, n))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
hal::exp32f(x, y, n);
}
static void Exp_64f_ipp(const double *x, double *y, int n)
{
CV_IPP_CHECK()
{
if (0 <= ippsExp_64f_A50(x, y, n))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
hal::exp64f(x, y, n);
}
#define Exp_32f Exp_32f_ipp
#define Exp_64f Exp_64f_ipp
#else
#define Exp_32f hal::exp32f
#define Exp_64f hal::exp64f
#endif
void exp( InputArray _src, OutputArray _dst ) void exp( InputArray _src, OutputArray _dst )
{ {
int type = _src.type(), depth = _src.depth(), cn = _src.channels(); int type = _src.type(), depth = _src.depth(), cn = _src.channels();
@ -675,9 +638,9 @@ void exp( InputArray _src, OutputArray _dst )
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
if( depth == CV_32F ) if( depth == CV_32F )
Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len); hal::exp32f((const float*)ptrs[0], (float*)ptrs[1], len);
else else
Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len); hal::exp64f((const double*)ptrs[0], (double*)ptrs[1], len);
} }
} }
@ -686,42 +649,6 @@ void exp( InputArray _src, OutputArray _dst )
* L O G * * L O G *
\****************************************************************************************/ \****************************************************************************************/
#ifdef HAVE_IPP
static void Log_32f_ipp(const float *x, float *y, int n)
{
CV_IPP_CHECK()
{
if (0 <= ippsLn_32f_A21(x, y, n))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
hal::log32f(x, y, n);
}
static void Log_64f_ipp(const double *x, double *y, int n)
{
CV_IPP_CHECK()
{
if (0 <= ippsLn_64f_A50(x, y, n))
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
hal::log64f(x, y, n);
}
#define Log_32f Log_32f_ipp
#define Log_64f Log_64f_ipp
#else
#define Log_32f hal::log32f
#define Log_64f hal::log64f
#endif
void log( InputArray _src, OutputArray _dst ) void log( InputArray _src, OutputArray _dst )
{ {
int type = _src.type(), depth = _src.depth(), cn = _src.channels(); int type = _src.type(), depth = _src.depth(), cn = _src.channels();
@ -742,9 +669,9 @@ void log( InputArray _src, OutputArray _dst )
for( size_t i = 0; i < it.nplanes; i++, ++it ) for( size_t i = 0; i < it.nplanes; i++, ++it )
{ {
if( depth == CV_32F ) if( depth == CV_32F )
Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len ); hal::log32f( (const float*)ptrs[0], (float*)ptrs[1], len );
else else
Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len ); hal::log64f( (const double*)ptrs[0], (double*)ptrs[1], len );
} }
} }
@ -1345,10 +1272,10 @@ void pow( InputArray _src, double power, OutputArray _dst )
if( x != x0 ) if( x != x0 )
memcpy(x, x0, bsz*esz1); memcpy(x, x0, bsz*esz1);
Log_32f(x, y, bsz); hal::log32f(x, y, bsz);
for( k = 0; k < bsz; k++ ) for( k = 0; k < bsz; k++ )
y[k] = (float)(y[k]*power); y[k] = (float)(y[k]*power);
Exp_32f(y, y, bsz); hal::exp32f(y, y, bsz);
for( k = 0; k < bsz; k++ ) for( k = 0; k < bsz; k++ )
{ {
if( x0[k] <= 0 ) if( x0[k] <= 0 )
@ -1372,10 +1299,10 @@ void pow( InputArray _src, double power, OutputArray _dst )
if( x != x0 ) if( x != x0 )
memcpy(x, x0, bsz*esz1); memcpy(x, x0, bsz*esz1);
Log_64f(x, y, bsz); hal::log64f(x, y, bsz);
for( k = 0; k < bsz; k++ ) for( k = 0; k < bsz; k++ )
y[k] *= power; y[k] *= power;
Exp_64f(y, y, bsz); hal::exp64f(y, y, bsz);
for( k = 0; k < bsz; k++ ) for( k = 0; k < bsz; k++ )
{ {

@ -44,8 +44,6 @@
using namespace std; using namespace std;
#undef HAVE_IPP
namespace { namespace {
static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI); static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
@ -224,18 +222,7 @@ void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angle
void magnitude32f(const float* x, const float* y, float* mag, int len) void magnitude32f(const float* x, const float* y, float* mag, int len)
{ {
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len); CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
#if defined HAVE_IPP CV_IPP_RUN_FAST(ippsMagnitude_32f(x, y, mag, len) >= 0);
CV_IPP_CHECK()
{
IppStatus status = ippsMagnitude_32f(x, y, mag, len);
if (status >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
#endif
int i = 0; int i = 0;
@ -261,18 +248,7 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
void magnitude64f(const double* x, const double* y, double* mag, int len) void magnitude64f(const double* x, const double* y, double* mag, int len)
{ {
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len); CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
#if defined(HAVE_IPP) CV_IPP_RUN_FAST(ippsMagnitude_64f(x, y, mag, len) >= 0);
CV_IPP_CHECK()
{
IppStatus status = ippsMagnitude_64f(x, y, mag, len);
if (status >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
#endif
int i = 0; int i = 0;
@ -299,17 +275,7 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
void invSqrt32f(const float* src, float* dst, int len) void invSqrt32f(const float* src, float* dst, int len)
{ {
CALL_HAL(invSqrt32f, cv_hal_invSqrt32f, src, dst, len); CALL_HAL(invSqrt32f, cv_hal_invSqrt32f, src, dst, len);
#if defined(HAVE_IPP) CV_IPP_RUN_FAST(ippsInvSqrt_32f_A21(src, dst, len) >= 0);
CV_IPP_CHECK()
{
if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
#endif
int i = 0; int i = 0;
@ -331,6 +297,8 @@ void invSqrt32f(const float* src, float* dst, int len)
void invSqrt64f(const double* src, double* dst, int len) void invSqrt64f(const double* src, double* dst, int len)
{ {
CALL_HAL(invSqrt64f, cv_hal_invSqrt64f, src, dst, len); CALL_HAL(invSqrt64f, cv_hal_invSqrt64f, src, dst, len);
CV_IPP_RUN_FAST(ippsInvSqrt_64f_A50(src, dst, len) >= 0);
int i = 0; int i = 0;
#if CV_SSE2 #if CV_SSE2
@ -347,17 +315,7 @@ void invSqrt64f(const double* src, double* dst, int len)
void sqrt32f(const float* src, float* dst, int len) void sqrt32f(const float* src, float* dst, int len)
{ {
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len); CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
#if defined(HAVE_IPP) CV_IPP_RUN_FAST(ippsSqrt_32f_A21(src, dst, len) >= 0);
CV_IPP_CHECK()
{
if (ippsSqrt_32f_A21(src, dst, len) >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
#endif
int i = 0; int i = 0;
@ -379,17 +337,7 @@ void sqrt32f(const float* src, float* dst, int len)
void sqrt64f(const double* src, double* dst, int len) void sqrt64f(const double* src, double* dst, int len)
{ {
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len); CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
#if defined(HAVE_IPP) CV_IPP_RUN_FAST(ippsSqrt_64f_A50(src, dst, len) >= 0);
CV_IPP_CHECK()
{
if (ippsSqrt_64f_A50(src, dst, len) >= 0)
{
CV_IMPL_ADD(CV_IMPL_IPP);
return;
}
setIppErrorStatus();
}
#endif
int i = 0; int i = 0;
@ -511,6 +459,8 @@ static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) <
void exp32f( const float *_x, float *y, int n ) void exp32f( const float *_x, float *y, int n )
{ {
CALL_HAL(exp32f, cv_hal_exp32f, _x, y, n); CALL_HAL(exp32f, cv_hal_exp32f, _x, y, n);
CV_IPP_RUN_FAST(ippsExp_32f_A21(_x, y, n) >= 0);
static const float static const float
A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0), A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0), A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
@ -711,6 +661,8 @@ void exp32f( const float *_x, float *y, int n )
void exp64f( const double *_x, double *y, int n ) void exp64f( const double *_x, double *y, int n )
{ {
CALL_HAL(exp64f, cv_hal_exp64f, _x, y, n); CALL_HAL(exp64f, cv_hal_exp64f, _x, y, n);
CV_IPP_RUN_FAST(ippsExp_64f_A50(_x, y, n) >= 0);
static const double static const double
A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0, A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0, A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
@ -1156,6 +1108,8 @@ static const double ln_2 = 0.69314718055994530941723212145818;
void log32f( const float *_x, float *y, int n ) void log32f( const float *_x, float *y, int n )
{ {
CALL_HAL(log32f, cv_hal_log32f, _x, y, n); CALL_HAL(log32f, cv_hal_log32f, _x, y, n);
CV_IPP_RUN_FAST(ippsLn_32f_A21(_x, y, n) >= 0);
static const float shift[] = { 0, -1.f/512 }; static const float shift[] = { 0, -1.f/512 };
static const float static const float
A0 = 0.3333333333333333333333333f, A0 = 0.3333333333333333333333333f,
@ -1301,6 +1255,8 @@ void log32f( const float *_x, float *y, int n )
void log64f( const double *x, double *y, int n ) void log64f( const double *x, double *y, int n )
{ {
CALL_HAL(log64f, cv_hal_log64f, x, y, n); CALL_HAL(log64f, cv_hal_log64f, x, y, n);
CV_IPP_RUN_FAST(ippsLn_64f_A50(x, y, n) >= 0);
static const double shift[] = { 0, -1./512 }; static const double shift[] = { 0, -1./512 };
static const double static const double
A7 = 1.0, A7 = 1.0,

Loading…
Cancel
Save