Merge pull request #1948 from ilya-lavrenov:tapi_norm

pull/1883/merge
Andrey Pavlenko 11 years ago committed by OpenCV Buildbot
commit 0dfa188ca5
  1. 2
      modules/core/src/ocl.cpp
  2. 21
      modules/core/src/opencl/arithm.cl
  3. 120
      modules/core/src/stat.cpp
  4. 119
      modules/core/test/ocl/test_arithm.cpp

@ -2379,7 +2379,7 @@ struct Program::Impl
size_t retsz = 0;
retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
if( retval >= 0 && retsz > 0 )
if( retval >= 0 && retsz > 1 )
{
AutoBuffer<char> bufbuf(retsz + 16);
char* buf = bufbuf;

@ -58,10 +58,10 @@
*/
#ifdef DOUBLE_SUPPORT
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define CV_EPSILON DBL_EPSILON
#define CV_PI M_PI
@ -76,12 +76,18 @@
#ifndef workT
#ifndef srcT1
#define srcT1 dstT
#endif
#ifndef srcT2
#define srcT2 dstT
#endif
#define workT dstT
#define srcelem1 *(__global dstT*)(srcptr1 + src1_index)
#define srcelem2 *(__global dstT*)(srcptr2 + src2_index)
#define srcelem1 *(__global srcT1*)(srcptr1 + src1_index)
#define srcelem2 *(__global srcT2*)(srcptr2 + src2_index)
#ifndef convertToDT
#define convertToDT noconvert
#endif
#else
@ -160,6 +166,11 @@
#elif defined OP_MAG
#define PROCESS_ELEM dstelem = hypot(srcelem1, srcelem2)
#elif defined OP_ABS_NOSAT
#define PROCESS_ELEM \
dstT v = convertToDT(srcelem1); \
dstelem = v >= 0 ? v : -v
#elif defined OP_PHASE_RADIANS
#define PROCESS_ELEM \
workT tmp = atan2(srcelem2, srcelem1); \

@ -1760,15 +1760,76 @@ static NormDiffFunc getNormDiffFunc(int normType, int depth)
}
double cv::norm( InputArray _src, int normType, InputArray _mask )
namespace cv {
static bool ocl_norm( InputArray _src, int normType, double & result )
{
Mat src = _src.getMat(), mask = _mask.getMat();
int depth = src.depth(), cn = src.channels();
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
normType &= 7;
if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) ||
(!doubleSupport && depth == CV_64F))
return false;
UMat src = _src.getUMat();
if (normType == NORM_INF)
{
UMat abssrc;
if (depth != CV_8U && depth != CV_16U)
{
int wdepth = std::max(CV_32S, depth);
char cvt[50];
ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc,
format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s -D convertToDT=%s%s",
ocl::typeToStr(wdepth), ocl::typeToStr(depth),
ocl::convertTypeStr(depth, wdepth, 1, cvt),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (kabs.empty())
return false;
abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn));
kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn));
size_t globalsize[2] = { src.cols * cn, src.rows };
if (!kabs.run(2, globalsize, NULL, false))
return false;
}
else
abssrc = src;
cv::minMaxIdx(abssrc.reshape(1), NULL, &result);
}
else if (normType == NORM_L1 || normType == NORM_L2)
{
Scalar s;
bool unstype = depth == CV_8U || depth == CV_16U;
ocl_sum(src.reshape(1), s, normType == NORM_L2 ?
OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS) );
result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]);
}
return true;
}
}
double cv::norm( InputArray _src, int normType, InputArray _mask )
{
normType &= NORM_TYPE_MASK;
CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
normType == NORM_L2 || normType == NORM_L2SQR ||
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
double _result = 0;
if (ocl::useOpenCL() && _mask.empty() && _src.isUMat() && _src.dims() <= 2 && ocl_norm(_src, normType, _result))
return _result;
Mat src = _src.getMat(), mask = _mask.getMat();
int depth = src.depth(), cn = src.channels();
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
size_t total_size = src.total();
@ -2047,9 +2108,56 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
return result.d;
}
namespace cv {
static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, double & result )
{
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
bool relative = (normType & NORM_RELATIVE) != 0;
normType &= ~NORM_RELATIVE;
if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) ||
(!doubleSupport && depth == CV_64F))
return false;
int wdepth = std::max(CV_32S, depth);
char cvt[50];
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D BINARY_OP -D OP_ABSDIFF -D dstT=%s -D workT=dstT -D srcT1=%s -D srcT2=srcT1"
" -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT%s",
ocl::typeToStr(wdepth), ocl::typeToStr(depth),
ocl::convertTypeStr(depth, wdepth, 1, cvt),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), diff(src1.size(), CV_MAKE_TYPE(wdepth, cn));
k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2),
ocl::KernelArg::WriteOnly(diff, cn));
size_t globalsize[2] = { diff.cols * cn, diff.rows };
if (!k.run(2, globalsize, NULL, false))
return false;
result = cv::norm(diff, normType);
if (relative)
result /= cv::norm(src2, normType) + DBL_EPSILON;
return true;
}
}
double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
{
CV_Assert( _src1.size() == _src2.size() && _src1.type() == _src2.type() );
double _result = 0;
if (ocl::useOpenCL() && _mask.empty() && _src1.isUMat() && _src2.isUMat() &&
_src1.dims() <= 2 && _src2.dims() <= 2 && ocl_norm(_src1, _src2, normType, _result))
return _result;
if( normType & CV_RELATIVE )
{
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
@ -2135,7 +2243,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
int depth = src1.depth(), cn = src1.channels();
CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
CV_Assert( src1.size == src2.size );
normType &= 7;
CV_Assert( normType == NORM_INF || normType == NORM_L1 ||

@ -795,8 +795,8 @@ struct RepeatTestCase :
{
const int type = CV_MAKE_TYPE(depth, cn);
nx = 2;//randomInt(1, 4);
ny = 2;//randomInt(1, 4);
nx = randomInt(1, 4);
ny = randomInt(1, 4);
Size srcRoiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
@ -813,7 +813,7 @@ struct RepeatTestCase :
typedef RepeatTestCase Repeat;
OCL_TEST_P(Repeat, DISABLED_Mat)
OCL_TEST_P(Repeat, Mat)
{
for (int i = 0; i < test_loop_times; ++i)
{
@ -1004,6 +1004,108 @@ OCL_TEST_P(Flip, BOTH)
}
}
//////////////////////////////// Norm /////////////////////////////////////////////////
static bool relativeError(double actual, double expected, double eps)
{
return std::abs(actual - expected) / actual < eps;
}
typedef ArithmTestBase Norm;
OCL_TEST_P(Norm, NORM_INF_1arg)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_INF));
OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_INF));
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
OCL_TEST_P(Norm, NORM_L1_1arg)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L1));
OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L1));
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
}
}
OCL_TEST_P(Norm, NORM_L2_1arg)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L2));
OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L2));
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
}
}
OCL_TEST_P(Norm, NORM_INF_2args)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
int type = NORM_INF;
if (relative == 1)
type |= NORM_RELATIVE;
OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type));
OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type));
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
OCL_TEST_P(Norm, NORM_L1_2args)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
int type = NORM_L1;
if (relative == 1)
type |= NORM_RELATIVE;
OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type));
OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type));
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
}
}
OCL_TEST_P(Norm, NORM_L2_2args)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
int type = NORM_L2;
if (relative == 1)
type |= NORM_RELATIVE;
OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type));
OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type));
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
}
}
//////////////////////////////////////// Instantiation /////////////////////////////////////////
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
@ -1017,10 +1119,10 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHA
OCL_INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
//OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
@ -1033,7 +1135,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(::testing::Values(CV_32F, CV_64
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F), Values(1, 2, 3, 4), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
} } // namespace cvtest::ocl

Loading…
Cancel
Save