Merge pull request #2268 from ilya-lavrenov:tapi_pow

pull/2252/merge
Andrey Pavlenko 11 years ago committed by OpenCV Buildbot
commit 49db511882
  1. 54
      modules/core/src/mathfuncs.cpp
  2. 7
      modules/core/src/opencl/arithm.cl
  3. 16
      modules/core/test/ocl/test_arithm.cpp

@ -2033,17 +2033,17 @@ static IPowFunc ipowTab[] =
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
static bool ocl_pow(InputArray _src, double power, OutputArray _dst) static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
bool is_ipower, int ipower)
{ {
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( !(_src.dims() <= 2 && (depth == CV_32F || depth == CV_64F)) || if (depth == CV_64F && !doubleSupport)
(depth == CV_64F && !doubleSupport) )
return false; return false;
bool issqrt = std::abs(power - 0.5) < DBL_EPSILON; bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
const char * const op = issqrt ? "OP_SQRT" : "OP_POW"; const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
@ -2060,6 +2060,8 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst)
if (issqrt) if (issqrt)
k.args(srcarg, dstarg); k.args(srcarg, dstarg);
else if (is_ipower)
k.args(srcarg, dstarg, ipower);
else else
{ {
if (depth == CV_32F) if (depth == CV_32F)
@ -2076,39 +2078,35 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst)
void pow( InputArray _src, double power, OutputArray _dst ) void pow( InputArray _src, double power, OutputArray _dst )
{ {
CV_OCL_RUN(_dst.isUMat(), bool is_ipower = false, same = false;
ocl_pow(_src, power, _dst)) int type = _src.type(), depth = CV_MAT_DEPTH(type),
cn = CV_MAT_CN(type), ipower = cvRound(power);
Mat src = _src.getMat();
int type = src.type(), depth = src.depth(), cn = src.channels();
_dst.create( src.dims, src.size, type );
Mat dst = _dst.getMat();
int ipower = cvRound(power);
bool is_ipower = false;
if( fabs(ipower - power) < DBL_EPSILON ) if( fabs(ipower - power) < DBL_EPSILON )
{ {
if( ipower < 0 ) if( ipower < 0 )
{ {
divide( 1., src, dst ); divide( 1., _src, _dst );
if( ipower == -1 ) if( ipower == -1 )
return; return;
ipower = -ipower; ipower = -ipower;
src = dst; same = true;
} }
switch( ipower ) switch( ipower )
{ {
case 0: case 0:
dst = Scalar::all(1); _dst.createSameSize(_src, type);
_dst.setTo(Scalar::all(1));
return; return;
case 1: case 1:
src.copyTo(dst); _src.copyTo(_dst);
return; return;
case 2: case 2:
multiply(src, src, dst); if (same)
multiply(_dst, _dst, _dst);
else
multiply(_src, _src, _dst);
return; return;
default: default:
is_ipower = true; is_ipower = true;
@ -2117,6 +2115,22 @@ void pow( InputArray _src, double power, OutputArray _dst )
else else
CV_Assert( depth == CV_32F || depth == CV_64F ); CV_Assert( depth == CV_32F || depth == CV_64F );
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower))
Mat src, dst;
if (same)
{
dst = _dst.getMat();
src = dst;
}
else
{
src = _src.getMat();
_dst.create( src.dims, src.size, type );
dst = _dst.getMat();
}
const Mat* arrays[] = {&src, &dst, 0}; const Mat* arrays[] = {&src, &dst, 0};
uchar* ptrs[2]; uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs); NAryMatIterator it(arrays, ptrs);

@ -211,6 +211,11 @@
#elif defined OP_POW #elif defined OP_POW
#define PROCESS_ELEM dstelem = pow(srcelem1, srcelem2) #define PROCESS_ELEM dstelem = pow(srcelem1, srcelem2)
#elif defined OP_POWN
#undef workT
#define workT int
#define PROCESS_ELEM dstelem = pown(srcelem1, srcelem2)
#elif defined OP_SQRT #elif defined OP_SQRT
#define PROCESS_ELEM dstelem = sqrt(srcelem1) #define PROCESS_ELEM dstelem = sqrt(srcelem1)
@ -293,7 +298,7 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \ #if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \ defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \ defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
defined OP_MUL || defined OP_DIV defined OP_MUL || defined OP_DIV || defined OP_POWN
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT srcelem2 #define EXTRA_PARAMS , workT srcelem2
#endif #endif

@ -773,16 +773,18 @@ typedef ArithmTestBase Pow;
OCL_TEST_P(Pow, Mat) OCL_TEST_P(Pow, Mat)
{ {
static const double pows[] = { -4, -1, -2.5, 0, 1, 2, 3.7, 4 };
for (int j = 0; j < test_loop_times; j++) for (int j = 0; j < test_loop_times; j++)
{ for (int k = 0, size = sizeof(pows) / sizeof(double); k < size; ++k)
generateTestData(); {
double p = 4.5; generateTestData();
OCL_OFF(cv::pow(src1_roi, p, dst1_roi)); OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi));
OCL_ON(cv::pow(usrc1_roi, p, udst1_roi)); OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi));
Near(1); Near(1);
} }
} }
//////////////////////////////// AddWeighted ///////////////////////////////////////////////// //////////////////////////////// AddWeighted /////////////////////////////////////////////////

Loading…
Cancel
Save