fixed and extended ocl::norm

pull/1519/head
Ilya Lavrenov 11 years ago
parent a562b5c985
commit 065b40c6c3
  1. 120
      modules/ocl/src/arithm.cpp
  2. 93
      modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
  3. 67
      modules/ocl/test/test_arithm.cpp

@ -64,6 +64,7 @@ namespace cv
{
//////////////////////////////// OpenCL kernel strings /////////////////////
extern const char *arithm_absdiff_nonsaturate;
extern const char *arithm_nonzero;
extern const char *arithm_sum;
extern const char *arithm_minMax;
@ -435,14 +436,12 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
static sumFunc functab[3] =
{
arithmetic_sum<int>,
arithmetic_sum<float>,
arithmetic_sum<double>,
arithmetic_sum<double>
};
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
int ddepth = std::max(src.depth(), CV_32S);
if (!hasDouble && ddepth == CV_64F)
ddepth = CV_32F;
int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F);
sumFunc func = functab[ddepth - CV_32S];
return func(src, SQR_SUM, ddepth);
@ -595,57 +594,102 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons
double cv::ocl::norm(const oclMat &src1, int normType)
{
return norm(src1, oclMat(src1.size(), src1.type(), Scalar::all(0)), normType);
CV_Assert((normType & NORM_RELATIVE) == 0);
return norm(src1, oclMat(), normType);
}
static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & src2, oclMat & diff)
{
CV_Assert(src1.step % src1.elemSize() == 0 && (src2.empty() || src2.step % src2.elemSize() == 0));
Context *clCxt = src1.clCxt;
int ddepth = CV_64F;
diff.create(src1.size(), CV_MAKE_TYPE(ddepth, src1.channels()));
int oclChannels = src1.oclchannels(), sdepth = src1.depth();
int src1step1 = src1.step / src1.elemSize(), src1offset1 = src1.offset / src1.elemSize();
int src2step1 = src2.step / src2.elemSize(), src2offset1 = src2.offset / src2.elemSize();
int diffstep1 = diff.step / diff.elemSize(), diffoffset1 = diff.offset / diff.elemSize();
string kernelName = "arithm_absdiff_nonsaturate";
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { diff.cols, diff.rows, 1 };
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char * const channelMap[] = { "", "", "2", "4", "4" };
std::string buildOptions = format("-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s",
typeMap[sdepth], channelMap[oclChannels],
typeMap[ddepth], channelMap[oclChannels],
typeMap[ddepth], channelMap[oclChannels]);
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 ));
if (!src2.empty())
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
kernelName += "_binary";
}
args.push_back( make_pair( sizeof(cl_mem), (void *)&diff.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&diffstep1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&diffoffset1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
openCLExecuteKernel(clCxt, &arithm_absdiff_nonsaturate,
kernelName, globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
}
double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
{
CV_Assert(!src1.empty());
CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size()));
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
{
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
}
bool isRelative = (normType & NORM_RELATIVE) != 0;
normType &= 7;
CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2));
int channels = src1.oclchannels(), i = 0, *p;
normType &= NORM_TYPE_MASK;
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
Scalar s;
int cn = src1.channels();
double r = 0;
oclMat gm1(src1.size(), src1.type());
int min_int = (normType == NORM_INF ? CL_INT_MIN : 0);
Mat m(1, 1, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(min_int));
oclMat gm2(m), emptyMat;
switch(normType)
oclMat diff;
arithm_absdiff_nonsaturate_run(src1, src2, diff);
switch (normType)
{
case NORM_INF:
// arithmetic_run(src1, src2, gm1, "arithm_op_absdiff");
//arithmetic_minMax_run(gm1,emptyMat, gm2,"arithm_op_max");
m = (gm2);
p = (int *)m.data;
r = -std::numeric_limits<double>::max();
for (i = 0; i < channels; i++)
{
r = std::max(r, (double)p[i]);
}
diff = diff.reshape(1);
minMax(diff, NULL, &r);
break;
case NORM_L1:
//arithmetic_run(src1, src2, gm1, "arithm_op_absdiff");
//arithmetic_sum_run(gm1, gm2,"arithm_op_sum");
m = (gm2);
p = (int *)m.data;
for (i = 0; i < channels; i++)
{
r = r + (double)p[i];
}
s = sum(diff);
for (int i = 0; i < cn; ++i)
r += s[i];
break;
case NORM_L2:
//arithmetic_run(src1, src2, gm1, "arithm_op_absdiff");
//arithmetic_sum_run(gm1, gm2,"arithm_op_squares_sum");
m = (gm2);
p = (int *)m.data;
for (i = 0; i < channels; i++)
{
r = r + (double)p[i];
}
s = sqrSum(diff);
for (int i = 0; i < cn; ++i)
r += s[i];
r = std::sqrt(r);
break;
}
if (isRelative)
r = r / norm(src2, normType);
return r;
}

@ -0,0 +1,93 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif
#endif
__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
__global srcT *src2, int src2_step, int src2_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
dstT t0 = convertToDstT(src1[src1_index]);
dstT t1 = convertToDstT(src2[src2_index]);
dstT t2 = t0 - t1;
dst[dst_index] = t2 >= 0 ? t2 : -t2;
}
}
__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
dstT t0 = convertToDstT(src1[src1_index]);
dst[dst_index] = t0 >= 0 ? t0 : -t0;
}
}

@ -220,8 +220,8 @@ PARAM_TEST_CASE(ArithmTestBase, int, int, bool)
cv::RNG &rng = TS::ptr()->get_rng();
src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -15440, 14450, false);
src1 = randomMat(rng, randomSize(MIN_VALUE, MAX_VALUE), type, 2, 11, false);
src2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, -1540, 1740, false);
dst1 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
dst2 = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), type, 5, 16, false);
mask = randomMat(rng, !use_roi ? src1.size() : randomSize(MIN_VALUE, MAX_VALUE), CV_8UC1, 0, 2, false);
@ -1440,7 +1440,7 @@ TEST_P(SetIdentity, Mat)
}
}
//////////////////////////////// setIdentity /////////////////////////////////////////////////
//////////////////////////////// meanStdDev /////////////////////////////////////////////////
typedef ArithmTestBase MeanStdDev;
@ -1458,12 +1458,70 @@ TEST_P(MeanStdDev, Mat)
for (int i = 0; i < 4; ++i)
{
EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 1e-5);
EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1);
EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1);
}
}
}
//////////////////////////////// Norm /////////////////////////////////////////////////
typedef ArithmTestBase Norm;
TEST_P(Norm, NORM_INF)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
int type = NORM_INF;
if (relative == 1)
type |= NORM_RELATIVE;
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
TEST_P(Norm, NORM_L1)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
int type = NORM_L1;
if (relative == 1)
type |= NORM_RELATIVE;
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
TEST_P(Norm, NORM_L2)
{
for (int relative = 0; relative < 2; ++relative)
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
int type = NORM_L2;
if (relative == 1)
type |= NORM_RELATIVE;
const double cpuRes = cv::norm(src1_roi, src2_roi, type);
const double gpuRes = cv::ocl::norm(gsrc1, gsrc2, type);
EXPECT_NEAR(cpuRes, gpuRes, 0.1);
}
}
//////////////////////////////////////// Instantiation /////////////////////////////////////////
INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool(), Bool()));
@ -1495,5 +1553,6 @@ INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32F, CV_64F), testing::Ra
INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, SetIdentity, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, MeanStdDev, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(testing::Range(CV_8U, CV_USRTYPE1), testing::Range(1, 5), Bool()));
#endif // HAVE_OPENCL

Loading…
Cancel
Save