Added ocl::pyrUp to T-API

pull/1995/head
Alexander Karsakov 11 years ago
parent ac230cd2ae
commit febe528363
  1. 1021
      modules/imgproc/src/opencl/pyr_down.cl
  2. 159
      modules/imgproc/src/opencl/pyr_up.cl
  3. 82
      modules/imgproc/src/pyramids.cpp
  4. 89
      modules/imgproc/test/ocl/test_pyramids.cpp

File diff suppressed because it is too large Load Diff

@ -0,0 +1,159 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
///////////////////////////////////////////////////////////////////////
//////////////////////// Generic PyrUp //////////////////////////////
///////////////////////////////////////////////////////////////////////
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define noconvert
__kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int lsizex = get_local_size(0);
const int lsizey = get_local_size(1);
const int tidx = get_local_id(0);
const int tidy = get_local_id(1);
__local FT s_srcPatch[10][10];
__local FT s_dstPatch[20][16];
__global T * dstData = (__global T *)(dst + dst_offset);
__global const T * srcData = (__global const T *)(src + src_offset);
if( tidx < 10 && tidy < 10 )
{
int srcx = mad24((int)get_group_id(0), lsizex>>1, tidx) - 1;
int srcy = mad24((int)get_group_id(1), lsizey>>1, tidy) - 1;
srcx = abs(srcx);
srcx = min(src_cols - 1, srcx);
srcy = abs(srcy);
srcy = min(src_rows - 1, srcy);
s_srcPatch[tidy][tidx] = convertToFT(srcData[srcx + srcy * src_step / (int) sizeof(T)]);
}
barrier(CLK_LOCAL_MEM_FENCE);
FT sum = 0.f;
const FT evenFlag = (FT)((tidx & 1) == 0);
const FT oddFlag = (FT)((tidx & 1) != 0);
const bool eveny = ((tidy & 1) == 0);
const FT co1 = 0.375f;
const FT co2 = 0.25f;
const FT co3 = 0.0625f;
if(eveny)
{
sum = ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + ( evenFlag* co1 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + tidy][tidx] = sum;
if (tidy < 2)
{
sum = 0;
if (eveny)
{
sum = (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[tidy][tidx] = sum;
}
if (tidy > 13)
{
sum = 0;
if (eveny)
{
sum = (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1) * s_srcPatch[lsizey-7][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + tidy][tidx] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = co3 * s_dstPatch[2 + tidy - 2][tidx];
sum = sum + co2 * s_dstPatch[2 + tidy - 1][tidx];
sum = sum + co1 * s_dstPatch[2 + tidy ][tidx];
sum = sum + co2 * s_dstPatch[2 + tidy + 1][tidx];
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
if ((x < dst_cols) && (y < dst_rows))
dstData[x + y * dst_step / (int)sizeof(T)] = convertToT(4.0f * sum);
}

@ -401,23 +401,25 @@ pyrUp_( const Mat& _src, Mat& _dst, int)
typedef void (*PyrFunc)(const Mat&, Mat&, int); typedef void (*PyrFunc)(const Mat&, Mat&, int);
}
namespace cv
{
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{ {
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
if (depth != CV_8U && depth != CV_16U && depth != CV_16S && depth != CV_32F) if (((channels != 1) && (channels != 2) && (channels != 4))
|| (borderType != BORDER_DEFAULT))
return false; return false;
if (channels != 1 && channels != 3 && channels != 4)
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ((depth == CV_64F) && !(doubleSupport))
return false; return false;
double doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
Size ssize = _src.size();
Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
CV_Assert( ssize.width > 0 && ssize.height > 0 &&
std::abs(dsize.width*2 - ssize.width) <= 2 &&
std::abs(dsize.height*2 - ssize.height) <= 2 );
UMat src = _src.getUMat(); UMat src = _src.getUMat();
Size dsize = cv::Size((src.rows + 1) / 2, (src.cols + 1) / 2);
_dst.create( dsize, src.type() ); _dst.create( dsize, src.type() );
UMat dst = _dst.getUMat(); UMat dst = _dst.getUMat();
@ -425,31 +427,73 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
ocl::ProgramSource2 program = ocl::imgproc::pyr_down_oclsrc; ocl::ProgramSource2 program = ocl::imgproc::pyr_down_oclsrc;
ocl::Kernel k; ocl::Kernel k;
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
char cvt[2][50]; char cvt[2][50];
k.create(kernelName, program, k.create(kernelName, program,
format("-D T=%s -D cn=%d -D convertToT=%s%s -D convertToFT=%s%s", format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
ocl::typeToStr(type), channels, ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(CV_32F, depth, channels, cvt[0]), ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, CV_32F, channels, cvt[1]), ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "")); doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
size_t localThreads[2] = { 256, 1 }; size_t localThreads[2] = { 256, 1 };
size_t globalThreads[2] = { src.cols, dst.rows }; size_t globalThreads[2] = { src.cols, dst.rows };
return k.run(2, globalThreads, localThreads, false); return k.run(2, globalThreads, localThreads, false);
} }
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
if (((channels != 1) && (channels != 2) && (channels != 4))
|| (borderType != BORDER_DEFAULT))
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ((depth == CV_64F) && !(doubleSupport))
return false;
Size ssize = _src.size();
if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
return false;
UMat src = _src.getUMat();
Size dsize = Size(ssize.width * 2, ssize.height * 2);
_dst.create( dsize, src.type() );
UMat dst = _dst.getUMat();
const char * const kernelName = "pyrUp";
ocl::ProgramSource2 program = ocl::imgproc::pyr_up_oclsrc;
ocl::Kernel k;
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
char cvt[2][50];
k.create(kernelName, program,
format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
size_t globalThreads[2] = {dst.cols, dst.rows};
size_t localThreads[2] = {16, 16};
return k.run(2, globalThreads, localThreads, false);
}
} }
void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
{ {
if (ocl::useOpenCL() && _dst.isUMat() && if (ocl::useOpenCL() && _dst.isUMat() &&
ocl_pyrDown(_src, _dst, _dsz, borderType)) ocl_pyrDown(_src, _dst, _dsz, borderType))
return; return;
Mat src = _src.getMat(); Mat src = _src.getMat();
Size dsz = _dsz == Size() ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz; Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
_dst.create( dsz, src.type() ); _dst.create( dsz, src.type() );
Mat dst = _dst.getMat(); Mat dst = _dst.getMat();
@ -478,8 +522,12 @@ void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borde
void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
{ {
if (ocl::useOpenCL() && _dst.isUMat() &&
ocl_pyrUp(_src, _dst, _dsz, borderType))
return;
Mat src = _src.getMat(); Mat src = _src.getMat();
Size dsz = _dsz == Size() ? Size(src.cols*2, src.rows*2) : _dsz; Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz;
_dst.create( dsz, src.type() ); _dst.create( dsz, src.type() );
Mat dst = _dst.getMat(); Mat dst = _dst.getMat();

@ -45,73 +45,98 @@
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include <iomanip> #include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
using namespace cv; namespace cvtest {
using namespace testing; namespace ocl {
using namespace std;
PARAM_TEST_CASE(PyrBase, MatDepth, Channels) PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
{ {
int depth; int depth, channels;
int channels; bool use_roi;
Mat dst_cpu; TEST_DECLARE_INPUT_PARAMETER(src)
ocl::oclMat gdst; TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp() virtual void SetUp()
{ {
depth = GET_PARAM(0); depth = GET_PARAM(0);
channels = GET_PARAM(1); channels = GET_PARAM(1);
use_roi = GET_PARAM(2);
}
void generateTestData(Size src_roiSize, Size dst_roiSize)
{
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, src_roiSize, srcBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dst_roiSize, dstBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE);
UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
void Near(double threshold = 0.0)
{
OCL_EXPECT_MATS_NEAR(dst, threshold);
} }
}; };
/////////////////////// PyrDown ////////////////////////// /////////////////////// PyrDown //////////////////////////
typedef PyrBase PyrDown; typedef PyrTestBase PyrDown;
OCL_TEST_P(PyrDown, Mat) OCL_TEST_P(PyrDown, Mat)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < test_loop_times; j++)
{ {
Size size(MWIDTH, MHEIGHT); Size src_roiSize = randomSize(1, MAX_VALUE);
Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255); Size dst_roiSize = Size(randomInt((src_roiSize.width - 1) / 2, (src_roiSize.width + 3) / 2),
ocl::oclMat gsrc(src); randomInt((src_roiSize.height - 1) / 2, (src_roiSize.height + 3) / 2));
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
generateTestData(src_roiSize, dst_roiSize);
pyrDown(src, dst_cpu); OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize));
ocl::pyrDown(gsrc, gdst); OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize));
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), depth == CV_32F ? 1e-4f : 1.0f); Near(depth == CV_32F ? 1e-4f : 1.0f);
} }
} }
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F), Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 3, 4))); Values(1, 2, 4),
Bool()
));
/////////////////////// PyrUp ////////////////////////// /////////////////////// PyrUp //////////////////////////
typedef PyrBase PyrUp; typedef PyrTestBase PyrUp;
OCL_TEST_P(PyrUp, Accuracy) OCL_TEST_P(PyrUp, Mat)
{ {
for (int j = 0; j < LOOP_TIMES; j++) for (int j = 0; j < test_loop_times; j++)
{ {
Size size(MWIDTH, MHEIGHT); Size src_roiSize = randomSize(1, MAX_VALUE);
Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255); Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height);
ocl::oclMat gsrc(src); generateTestData(src_roiSize, dst_roiSize);
pyrUp(src, dst_cpu); OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize));
ocl::pyrUp(gsrc, gdst); OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize));
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (depth == CV_32F ? 1e-4f : 1.0)); Near(depth == CV_32F ? 1e-4f : 1.0f);
} }
} }
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 4),
Bool()
));
} } // namespace cvtest::ocl
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F),
Values(1, 3, 4)));
#endif // HAVE_OPENCL #endif // HAVE_OPENCL

Loading…
Cancel
Save