fixed ocl::phase

pull/1519/head
Ilya Lavrenov 12 years ago
parent b864f48274
commit 7edcefb2be
  1. 48
      modules/ocl/src/arithm.cpp
  2. 129
      modules/ocl/src/opencl/arithm_phase.cl
  3. 22
      modules/ocl/test/test_arithm.cpp

@ -461,8 +461,8 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0));
oclMat dst1(m1), dst2(m2); oclMat dst1(m1), dst2(m2);
//arithmetic_sum_run(src, dst1,"arithm_op_sum"); // arithmetic_sum_run(src, dst1, "arithm_op_sum");
//arithmetic_sum_run(src, dst2,"arithm_op_squares_sum"); // arithmetic_sum_run(src, dst2, "arithm_op_squares_sum");
m1 = (Mat)dst1; m1 = (Mat)dst1;
m2 = (Mat)dst2; m2 = (Mat)dst2;
@ -558,7 +558,6 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
} }
} }
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{ {
oclMat buf; oclMat buf;
@ -928,47 +927,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
return; return;
} }
CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows);
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt; Context *clCxt = src1.clCxt;
int channels = dst.oclchannels(); int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels();
int depth = dst.depth(); int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1();
int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1();
size_t vector_length = 1; int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 }; size_t localThreads[3] = { 64, 4, 1 };
size_t globalThreads[3] = { cols, dst.rows, 1 }; size_t globalThreads[3] = { cols1, dst.rows, 1 };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args; vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
} }
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees) void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
{ {
CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0);
Angle.create(x.size(), x.type()); Angle.create(x.size(), x.type());
string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians"; arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase);
if (angleInDegrees)
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
else
arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
@ -1540,7 +1530,7 @@ oclMatExpr::operator oclMat() const
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
#define TILE_DIM (32) #define TILE_DIM (32)
#define BLOCK_ROWS (256/TILE_DIM) #define BLOCK_ROWS (256 / TILE_DIM)
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
{ {

@ -45,110 +45,125 @@
// //
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64:enable #pragma OPENCL EXTENSION cl_khr_fp64:enable
#elif defined (cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#endif #endif
#endif
#define CV_PI 3.1415926535898 #define CV_PI 3.1415926535898
#define CV_2PI 2*3.1415926535898
/**************************************phase inradians**************************************/ /**************************************phase inradians**************************************/
__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index)); float data1 = src1[src1_index];
float data2 = *((__global float *)((__global char *)src2 + src2_index)); float data2 = src2[src2_index];
float tmp = atan2(data2,data1); float tmp = atan2(data2, data1);
*((__global float *)((__global char *)dst + dst_index)) = tmp; if (tmp < 0)
} tmp += CV_2PI;
dst[dst_index] = tmp;
}
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step, int src2_offset, __global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step, int dst_offset, __global double *dst, int dst_step1, int dst_offset1,
int rows, int cols, int dst_step1) int cols, int rows)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index)); double data1 = src1[src1_index];
double data2 = *((__global double *)((__global char *)src2 + src2_index)); double data2 = src2[src2_index];
double tmp = atan2(data2, data1);
*((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1); if (tmp < 0)
} tmp += CV_2PI;
dst[dst_index] = tmp;
}
} }
#endif #endif
/**************************************phase indegrees**************************************/ /**************************************phase indegrees**************************************/
__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
{
__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1,
__global float *src2, int src2_step1, int src2_offset1,
__global float *dst, int dst_step1, int dst_offset1,
int cols, int rows)
{
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
float data1 = *((__global float *)((__global char *)src1 + src1_index)); float data1 = src1[src1_index];
float data2 = *((__global float *)((__global char *)src2 + src2_index)); float data2 = src2[src2_index];
float tmp = atan2(data2,data1); float tmp = atan2(data2, data1);
float tmp_data = 180*tmp/CV_PI; tmp = 180 * tmp / CV_PI;
*((__global float *)((__global char *)dst + dst_index)) = tmp_data; if (tmp < 0)
} tmp += 360;
dst[dst_index] = tmp;
}
} }
#if defined (DOUBLE_SUPPORT) #if defined (DOUBLE_SUPPORT)
__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset, __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1,
__global double *src2, int src2_step, int src2_offset, __global double *src2, int src2_step1, int src2_offset1,
__global double *dst, int dst_step, int dst_offset, __global double *dst, int dst_step1, int dst_offset1,
int rows, int cols, int dst_step1) int cols, int rows)
{ {
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < cols && y < rows) if (x < cols && y < rows)
{ {
int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); int dst_index = mad24(y, dst_step1, x + dst_offset1);
double data1 = *((__global double *)((__global char *)src1 + src1_index)); double data1 = src1[src1_index];
double data2 = *((__global double *)((__global char *)src2 + src2_index)); double data2 = src2[src2_index];
double tmp = atan2(data2,data1); double tmp = atan2(src2[src2_index], src1[src1_index]);
double tmp_data = 180*tmp/CV_PI;
*((__global double *)((__global char *)dst + dst_index)) = tmp_data; tmp = 180 * tmp / CV_PI;
} if (tmp < 0)
tmp += 360;
dst[dst_index] = tmp;
}
} }
#endif #endif

@ -464,7 +464,6 @@ TEST_P(Mul, Scalar)
} }
} }
TEST_P(Mul, Mat_Scalar) TEST_P(Mul, Mat_Scalar)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
@ -507,7 +506,6 @@ TEST_P(Div, Scalar)
} }
} }
TEST_P(Div, Mat_Scalar) TEST_P(Div, Mat_Scalar)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
@ -1173,17 +1171,27 @@ TEST_P(CountNonZero, MAT)
typedef ArithmTestBase Phase; typedef ArithmTestBase Phase;
TEST_P(Phase, DISABLED_Mat) TEST_P(Phase, angleInDegrees)
{ {
for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++)
{
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < LOOP_TIMES; j++)
{ {
random_roi(); random_roi();
cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false); cv::phase(src1_roi, src2_roi, dst1_roi, true);
cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false); cv::ocl::phase(gsrc1, gsrc2, gdst1, true);
Near(1e-2); Near(1e-2);
} }
}
TEST_P(Phase, angleInRadians)
{
for (int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::phase(src1_roi, src2_roi, dst1_roi);
cv::ocl::phase(gsrc1, gsrc2, gdst1);
Near(1e-2);
} }
} }

Loading…
Cancel
Save