parent
064d022a4b
commit
0db69fbe19
16 changed files with 4085 additions and 1 deletions
@ -0,0 +1,98 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Nathan, liujun@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include <iomanip> |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::ocl; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
#if !defined (HAVE_OPENCL) |
||||||
|
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, |
||||||
|
oclMat& result){throw_nogpu();} |
||||||
|
#else |
||||||
|
namespace cv
|
||||||
|
{ |
||||||
|
namespace ocl
|
||||||
|
{ |
||||||
|
////////////////////////////////////OpenCL kernel strings//////////////////////////
|
||||||
|
extern const char *blend_linear; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, |
||||||
|
oclMat& result) |
||||||
|
{ |
||||||
|
cv::ocl::Context *ctx = img1.clCxt; |
||||||
|
assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt); |
||||||
|
int channels = img1.channels(); |
||||||
|
int depth = img1.depth(); |
||||||
|
int rows = img1.rows; |
||||||
|
int cols = img1.cols; |
||||||
|
int istep = img1.step; |
||||||
|
int wstep = weights1.step; |
||||||
|
size_t globalSize[] = {cols * channels, rows, 1}; |
||||||
|
size_t localSize[] = {16, 16, 1}; |
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args; |
||||||
|
|
||||||
|
if(globalSize[0]!=0) |
||||||
|
{ |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data )); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data )); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data )); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&istep )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&wstep )); |
||||||
|
std::string kernelName = "BlendLinear"; |
||||||
|
|
||||||
|
openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); |
||||||
|
} |
||||||
|
} |
||||||
|
#endif |
@ -0,0 +1,91 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Chunpeng Zhang, chunpeng@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include <iomanip> |
||||||
|
#include "precomp.hpp" |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::ocl; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
|
||||||
|
#if !defined(HAVE_OPENCL) |
||||||
|
|
||||||
|
void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); } |
||||||
|
|
||||||
|
#else /*!HAVE_OPENCL */ |
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace ocl |
||||||
|
{ |
||||||
|
extern const char* imgproc_columnsum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void cv::ocl::columnSum(const oclMat& src,oclMat& dst) |
||||||
|
{ |
||||||
|
CV_Assert(src.type() == CV_32FC1 && dst.type() == CV_32FC1 && src.size() == dst.size()); |
||||||
|
|
||||||
|
Context *clCxt = src.clCxt;
|
||||||
|
|
||||||
|
const std::string kernelName = "columnSum"; |
||||||
|
|
||||||
|
std::vector< pair<size_t, const void *> > args; |
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
|
||||||
|
|
||||||
|
size_t globalThreads[3] = {dst.cols, dst.rows, 1};
|
||||||
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
|
|
||||||
|
openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); |
||||||
|
|
||||||
|
} |
||||||
|
#endif |
@ -0,0 +1,196 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2010-2012, MulticoreWare Inc., all rights reserved. |
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// @Authors |
||||||
|
// Liu Liujun, liujun@multicorewareinc.com |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other GpuMaterials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors as is and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
__kernel void BlendLinear_C1_D0( |
||||||
|
__global uchar *dst, |
||||||
|
__global uchar *img1, |
||||||
|
__global uchar *img2, |
||||||
|
__global float *weight1, |
||||||
|
__global float *weight2, |
||||||
|
int rows, |
||||||
|
int cols, |
||||||
|
int istep, |
||||||
|
int wstep |
||||||
|
) |
||||||
|
{ |
||||||
|
int idx = get_global_id(0); |
||||||
|
int idy = get_global_id(1); |
||||||
|
if (idx < cols && idy < rows) |
||||||
|
{ |
||||||
|
int pos = idy * istep + idx; |
||||||
|
int wpos = idy * (wstep /sizeof(float)) + idx; |
||||||
|
float w1 = weight1[wpos]; |
||||||
|
float w2 = weight2[wpos]; |
||||||
|
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void BlendLinear_C3_D0( |
||||||
|
__global uchar *dst, |
||||||
|
__global uchar *img1, |
||||||
|
__global uchar *img2, |
||||||
|
__global float *weight1, |
||||||
|
__global float *weight2, |
||||||
|
int rows, |
||||||
|
int cols, |
||||||
|
int istep, |
||||||
|
int wstep |
||||||
|
) |
||||||
|
{ |
||||||
|
int idx = get_global_id(0); |
||||||
|
int idy = get_global_id(1); |
||||||
|
int x = idx / 3; |
||||||
|
int y = idy; |
||||||
|
if (x < cols && y < rows) |
||||||
|
{ |
||||||
|
int pos = idy * istep + idx; |
||||||
|
int wpos = idy * (wstep /sizeof(float)) + x; |
||||||
|
float w1 = weight1[wpos]; |
||||||
|
float w2 = weight2[wpos]; |
||||||
|
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void BlendLinear_C4_D0( |
||||||
|
__global uchar *dst, |
||||||
|
__global uchar *img1, |
||||||
|
__global uchar *img2, |
||||||
|
__global float *weight1, |
||||||
|
__global float *weight2, |
||||||
|
int rows, |
||||||
|
int cols, |
||||||
|
int istep, |
||||||
|
int wstep |
||||||
|
) |
||||||
|
{ |
||||||
|
int idx = get_global_id(0); |
||||||
|
int idy = get_global_id(1); |
||||||
|
int x = idx / 4; |
||||||
|
int y = idy; |
||||||
|
if (x < cols && y < rows) |
||||||
|
{ |
||||||
|
int pos = idy * istep + idx; |
||||||
|
int wpos = idy * (wstep /sizeof(float)) + x; |
||||||
|
float w1 = weight1[wpos]; |
||||||
|
float w2 = weight2[wpos]; |
||||||
|
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void BlendLinear_C1_D5( |
||||||
|
__global float *dst, |
||||||
|
__global float *img1, |
||||||
|
__global float *img2, |
||||||
|
__global float *weight1, |
||||||
|
__global float *weight2, |
||||||
|
int rows, |
||||||
|
int cols, |
||||||
|
int istep, |
||||||
|
int wstep |
||||||
|
) |
||||||
|
{ |
||||||
|
int idx = get_global_id(0); |
||||||
|
int idy = get_global_id(1); |
||||||
|
if (idx < cols && idy < rows) |
||||||
|
{ |
||||||
|
int pos = idy * (istep / sizeof(float)) + idx; |
||||||
|
int wpos = idy * (wstep /sizeof(float)) + idx; |
||||||
|
float w1 = weight1[wpos]; |
||||||
|
float w2 = weight2[wpos]; |
||||||
|
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void BlendLinear_C3_D5( |
||||||
|
__global float *dst, |
||||||
|
__global float *img1, |
||||||
|
__global float *img2, |
||||||
|
__global float *weight1, |
||||||
|
__global float *weight2, |
||||||
|
int rows, |
||||||
|
int cols, |
||||||
|
int istep, |
||||||
|
int wstep |
||||||
|
) |
||||||
|
{ |
||||||
|
int idx = get_global_id(0); |
||||||
|
int idy = get_global_id(1); |
||||||
|
int x = idx / 3; |
||||||
|
int y = idy; |
||||||
|
if (x < cols && y < rows) |
||||||
|
{ |
||||||
|
int pos = idy * (istep / sizeof(float)) + idx; |
||||||
|
int wpos = idy * (wstep /sizeof(float)) + x; |
||||||
|
float w1 = weight1[wpos]; |
||||||
|
float w2 = weight2[wpos]; |
||||||
|
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void BlendLinear_C4_D5( |
||||||
|
__global float *dst, |
||||||
|
__global float *img1, |
||||||
|
__global float *img2, |
||||||
|
__global float *weight1, |
||||||
|
__global float *weight2, |
||||||
|
int rows, |
||||||
|
int cols, |
||||||
|
int istep, |
||||||
|
int wstep |
||||||
|
) |
||||||
|
{ |
||||||
|
int idx = get_global_id(0); |
||||||
|
int idy = get_global_id(1); |
||||||
|
int x = idx / 4; |
||||||
|
int y = idy; |
||||||
|
if (x < cols && y < rows) |
||||||
|
{ |
||||||
|
int pos = idy * (istep / sizeof(float)) + idx; |
||||||
|
int wpos = idy * (wstep /sizeof(float)) + x; |
||||||
|
float w1 = weight1[wpos]; |
||||||
|
float w2 = weight2[wpos]; |
||||||
|
dst[pos] = (img1[pos] * w1 + img2[pos] * w2) / (w1 + w2 + 1e-5f); |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,80 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// @Authors |
||||||
|
// Chunpeng Zhang chunpeng@multicorewareinc.com |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other oclMaterials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors as is and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable |
||||||
|
#if defined (__ATI__) |
||||||
|
#pragma OPENCL EXTENSION cl_amd_fp64:enable |
||||||
|
|
||||||
|
#elif defined (__NVIDIA__) |
||||||
|
#pragma OPENCL EXTENSION cl_khr_fp64:enable |
||||||
|
#endif |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////// |
||||||
|
///////////////////////// columnSum //////////////////////////////// |
||||||
|
//////////////////////////////////////////////////////////////////// |
||||||
|
/// CV_32FC1 |
||||||
|
__kernel void columnSum_C1_D5(__global float* src,__global float* dst,int srcCols,int srcRows,int srcStep,int dstStep) |
||||||
|
{ |
||||||
|
const int x = get_global_id(0); |
||||||
|
|
||||||
|
srcStep >>= 2; |
||||||
|
dstStep >>= 2; |
||||||
|
|
||||||
|
if (x < srcCols) |
||||||
|
{ |
||||||
|
int srcIdx = x ; |
||||||
|
int dstIdx = x ; |
||||||
|
|
||||||
|
float sum = 0; |
||||||
|
|
||||||
|
for (int y = 0; y < srcRows; ++y) |
||||||
|
{ |
||||||
|
sum += src[srcIdx]; |
||||||
|
dst[dstIdx] = sum; |
||||||
|
srcIdx += srcStep; |
||||||
|
dstIdx += dstStep; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,824 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// @Authors |
||||||
|
// Peng Xiao, pengxiao@multicorewareinc.com |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other oclMaterials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors as is and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable |
||||||
|
|
||||||
|
#if defined (__ATI__) |
||||||
|
#pragma OPENCL EXTENSION cl_amd_fp64:enable |
||||||
|
|
||||||
|
#elif defined (__NVIDIA__) |
||||||
|
#pragma OPENCL EXTENSION cl_khr_fp64:enable |
||||||
|
#endif |
||||||
|
|
||||||
|
#if !defined(USE_SQR_INTEGRAL) && (defined (__ATI__) || defined (__NVIDIA__)) |
||||||
|
#define TYPE_IMAGE_SQSUM double |
||||||
|
#else |
||||||
|
#define TYPE_IMAGE_SQSUM ulong |
||||||
|
#endif |
||||||
|
|
||||||
|
////////////////////////////////////////////////// |
||||||
|
// utilities |
||||||
|
#define SQSUMS_PTR(ox, oy) mad24(gidy + oy, img_sqsums_step, gidx + img_sqsums_offset + ox) |
||||||
|
#define SUMS_PTR(ox, oy) mad24(gidy + oy, img_sums_step, gidx + img_sums_offset + ox) |
||||||
|
// normAcc* are accurate normalization routines which make GPU matchTemplate |
||||||
|
// consistent with CPU one |
||||||
|
float normAcc(float num, float denum) |
||||||
|
{ |
||||||
|
if(fabs(num) < denum) |
||||||
|
{ |
||||||
|
return num / denum; |
||||||
|
} |
||||||
|
if(fabs(num) < denum * 1.125f) |
||||||
|
{ |
||||||
|
return num > 0 ? 1 : -1; |
||||||
|
} |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
float normAcc_SQDIFF(float num, float denum) |
||||||
|
{ |
||||||
|
if(fabs(num) < denum) |
||||||
|
{ |
||||||
|
return num / denum; |
||||||
|
} |
||||||
|
if(fabs(num) < denum * 1.125f) |
||||||
|
{ |
||||||
|
return num > 0 ? 1 : -1; |
||||||
|
} |
||||||
|
return 1; |
||||||
|
} |
||||||
|
////////////////////////////////////////////////////////////////////// |
||||||
|
// normalize |
||||||
|
|
||||||
|
__kernel |
||||||
|
void normalizeKernel_C1_D0 |
||||||
|
( |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums, |
||||||
|
__global float * res, |
||||||
|
ulong tpl_sqsum, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int img_sqsums_offset, |
||||||
|
int img_sqsums_step, |
||||||
|
int res_offset, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
|
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
img_sqsums_step /= sizeof(*img_sqsums); |
||||||
|
img_sqsums_offset /= sizeof(*img_sqsums); |
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
float image_sqsum_ = (float)( |
||||||
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); |
||||||
|
res[res_idx] = normAcc(res[res_idx], sqrt(image_sqsum_ * tpl_sqsum)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Prepared_SQDIFF_C1_D0 |
||||||
|
( |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums, |
||||||
|
__global float * res, |
||||||
|
ulong tpl_sqsum, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int img_sqsums_offset, |
||||||
|
int img_sqsums_step, |
||||||
|
int res_offset, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
|
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
img_sqsums_step /= sizeof(*img_sqsums); |
||||||
|
img_sqsums_offset /= sizeof(*img_sqsums); |
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
float image_sqsum_ = (float)( |
||||||
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); |
||||||
|
res[res_idx] = image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Prepared_SQDIFF_NORMED_C1_D0 |
||||||
|
( |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums, |
||||||
|
__global float * res, |
||||||
|
ulong tpl_sqsum, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int img_sqsums_offset, |
||||||
|
int img_sqsums_step, |
||||||
|
int res_offset, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
|
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
img_sqsums_step /= sizeof(*img_sqsums); |
||||||
|
img_sqsums_offset /= sizeof(*img_sqsums); |
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
float image_sqsum_ = (float)( |
||||||
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); |
||||||
|
res[res_idx] = normAcc_SQDIFF(image_sqsum_ - 2.f * res[res_idx] + tpl_sqsum, |
||||||
|
sqrt(image_sqsum_ * tpl_sqsum)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////////////////////// |
||||||
|
// SQDIFF |
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_SQDIFF_C1_D0 |
||||||
|
( |
||||||
|
__global const uchar * img, |
||||||
|
__global const uchar * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
int delta; |
||||||
|
int sum = 0; |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
delta = img_ptr[j] - tpl_ptr[j]; |
||||||
|
sum = mad24(delta, delta, sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_SQDIFF_C1_D5 |
||||||
|
( |
||||||
|
__global const float * img, |
||||||
|
__global const float * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
float delta; |
||||||
|
float sum = 0; |
||||||
|
img_step /= sizeof(*img); |
||||||
|
img_offset /= sizeof(*img); |
||||||
|
tpl_step /= sizeof(*tpl); |
||||||
|
tpl_offset /= sizeof(*tpl); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
delta = img_ptr[j] - tpl_ptr[j]; |
||||||
|
sum = mad(delta, delta, sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_SQDIFF_C4_D0 |
||||||
|
( |
||||||
|
__global const uchar4 * img, |
||||||
|
__global const uchar4 * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
int4 delta; |
||||||
|
int4 sum = (int4)(0, 0, 0, 0); |
||||||
|
img_step /= sizeof(*img); |
||||||
|
img_offset /= sizeof(*img); |
||||||
|
tpl_step /= sizeof(*tpl); |
||||||
|
tpl_offset /= sizeof(*tpl); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect |
||||||
|
delta.x = img_ptr[j].x - tpl_ptr[j].x; |
||||||
|
delta.y = img_ptr[j].y - tpl_ptr[j].y; |
||||||
|
delta.z = img_ptr[j].z - tpl_ptr[j].z; |
||||||
|
delta.w = img_ptr[j].w - tpl_ptr[j].w; |
||||||
|
sum = mad24(delta, delta, sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_SQDIFF_C4_D5 |
||||||
|
( |
||||||
|
__global const float4 * img, |
||||||
|
__global const float4 * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
float4 delta; |
||||||
|
float4 sum = (float4)(0, 0, 0, 0); |
||||||
|
img_step /= sizeof(*img); |
||||||
|
img_offset /= sizeof(*img); |
||||||
|
tpl_step /= sizeof(*tpl); |
||||||
|
tpl_offset /= sizeof(*tpl); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
//delta = convert_int4(img_ptr[j] - tpl_ptr[j]); // this alternative is incorrect |
||||||
|
delta.x = img_ptr[j].x - tpl_ptr[j].x; |
||||||
|
delta.y = img_ptr[j].y - tpl_ptr[j].y; |
||||||
|
delta.z = img_ptr[j].z - tpl_ptr[j].z; |
||||||
|
delta.w = img_ptr[j].w - tpl_ptr[j].w; |
||||||
|
sum = mad(delta, delta, sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////////////////////// |
||||||
|
// CCORR |
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_CCORR_C1_D0 |
||||||
|
( |
||||||
|
__global const uchar * img, |
||||||
|
__global const uchar * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
int sum = 0; |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const uchar * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const uchar * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
sum = mad24(img_ptr[j], tpl_ptr[j], sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_CCORR_C1_D5 |
||||||
|
( |
||||||
|
__global const float * img, |
||||||
|
__global const float * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
float sum = 0; |
||||||
|
img_step /= sizeof(*img); |
||||||
|
img_offset /= sizeof(*img); |
||||||
|
tpl_step /= sizeof(*tpl); |
||||||
|
tpl_offset /= sizeof(*tpl); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const float * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const float * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
sum = mad(img_ptr[j], tpl_ptr[j], sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_CCORR_C4_D0 |
||||||
|
( |
||||||
|
__global const uchar4 * img, |
||||||
|
__global const uchar4 * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
int4 sum = (int4)(0, 0, 0, 0); |
||||||
|
img_step /= sizeof(*img); |
||||||
|
img_offset /= sizeof(*img); |
||||||
|
tpl_step /= sizeof(*tpl); |
||||||
|
tpl_offset /= sizeof(*tpl); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const uchar4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const uchar4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
sum = mad24(convert_int4(img_ptr[j]), convert_int4(tpl_ptr[j]), sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Naive_CCORR_C4_D5 |
||||||
|
( |
||||||
|
__global const float4 * img, |
||||||
|
__global const float4 * tpl, |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int img_offset, |
||||||
|
int tpl_offset, |
||||||
|
int res_offset, |
||||||
|
int img_step, |
||||||
|
int tpl_step, |
||||||
|
int res_step |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
int i,j; |
||||||
|
float4 sum = (float4)(0, 0, 0, 0); |
||||||
|
img_step /= sizeof(*img); |
||||||
|
img_offset /= sizeof(*img); |
||||||
|
tpl_step /= sizeof(*tpl); |
||||||
|
tpl_offset /= sizeof(*tpl); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
for(i = 0; i < tpl_rows; i ++) |
||||||
|
{ |
||||||
|
// get specific rows of img data |
||||||
|
__global const float4 * img_ptr = img + mad24(gidy + i, img_step, gidx + img_offset); |
||||||
|
__global const float4 * tpl_ptr = tpl + mad24(i, tpl_step, tpl_offset); |
||||||
|
for(j = 0; j < tpl_cols; j ++) |
||||||
|
{ |
||||||
|
sum = mad(convert_float4(img_ptr[j]), convert_float4(tpl_ptr[j]), sum); |
||||||
|
} |
||||||
|
} |
||||||
|
res[res_idx] = sum.x + sum.y + sum.z + sum.w; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////////////////////// |
||||||
|
// CCOFF |
||||||
|
__kernel |
||||||
|
void matchTemplate_Prepared_CCOFF_C1_D0 |
||||||
|
( |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int res_offset, |
||||||
|
int res_step, |
||||||
|
__global const uint * img_sums, |
||||||
|
int img_sums_offset, |
||||||
|
int img_sums_step, |
||||||
|
float tpl_sum |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
|
||||||
|
img_sums_offset /= sizeof(*img_sums); |
||||||
|
img_sums_step /= sizeof(*img_sums); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
float sum = (float)( |
||||||
|
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)])); |
||||||
|
res[res_idx] -= sum * tpl_sum; |
||||||
|
} |
||||||
|
} |
||||||
|
__kernel |
||||||
|
void matchTemplate_Prepared_CCOFF_C4_D0 |
||||||
|
( |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int res_offset, |
||||||
|
int res_step, |
||||||
|
__global const uint * img_sums_c0, |
||||||
|
__global const uint * img_sums_c1, |
||||||
|
__global const uint * img_sums_c2, |
||||||
|
__global const uint * img_sums_c3, |
||||||
|
int img_sums_offset, |
||||||
|
int img_sums_step, |
||||||
|
float tpl_sum_c0, |
||||||
|
float tpl_sum_c1, |
||||||
|
float tpl_sum_c2, |
||||||
|
float tpl_sum_c3 |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
|
||||||
|
img_sums_offset /= sizeof(*img_sums_c0); |
||||||
|
img_sums_step /= sizeof(*img_sums_c0); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
float ccorr = res[res_idx]; |
||||||
|
ccorr -= tpl_sum_c0*(float)( |
||||||
|
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)])); |
||||||
|
ccorr -= tpl_sum_c1*(float)( |
||||||
|
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)])); |
||||||
|
ccorr -= tpl_sum_c2*(float)( |
||||||
|
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)])); |
||||||
|
ccorr -= tpl_sum_c3*(float)( |
||||||
|
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)])); |
||||||
|
res[res_idx] = ccorr; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel |
||||||
|
void matchTemplate_Prepared_CCOFF_NORMED_C1_D0 |
||||||
|
( |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int res_offset, |
||||||
|
int res_step, |
||||||
|
float weight, |
||||||
|
__global const uint * img_sums, |
||||||
|
int img_sums_offset, |
||||||
|
int img_sums_step, |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums, |
||||||
|
int img_sqsums_offset, |
||||||
|
int img_sqsums_step, |
||||||
|
float tpl_sum, |
||||||
|
float tpl_sqsum |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
|
||||||
|
img_sqsums_step /= sizeof(*img_sqsums); |
||||||
|
img_sqsums_offset /= sizeof(*img_sqsums); |
||||||
|
img_sums_offset /= sizeof(*img_sums); |
||||||
|
img_sums_step /= sizeof(*img_sums); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
float image_sum_ = (float)( |
||||||
|
(img_sums[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums[SUMS_PTR(0, tpl_rows)] - img_sums[SUMS_PTR(0, 0)])); |
||||||
|
|
||||||
|
float image_sqsum_ = (float)( |
||||||
|
(img_sqsums[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums[SQSUMS_PTR(0, tpl_rows)] - img_sqsums[SQSUMS_PTR(0, 0)])); |
||||||
|
res[res_idx] = normAcc(res[res_idx] - image_sum_ * tpl_sum, |
||||||
|
sqrt(tpl_sqsum * (image_sqsum_ - weight * image_sum_ * image_sum_))); |
||||||
|
} |
||||||
|
} |
||||||
|
__kernel |
||||||
|
void matchTemplate_Prepared_CCOFF_NORMED_C4_D0 |
||||||
|
( |
||||||
|
__global float * res, |
||||||
|
int img_rows, |
||||||
|
int img_cols, |
||||||
|
int tpl_rows, |
||||||
|
int tpl_cols, |
||||||
|
int res_rows, |
||||||
|
int res_cols, |
||||||
|
int res_offset, |
||||||
|
int res_step, |
||||||
|
float weight, |
||||||
|
__global const uint * img_sums_c0, |
||||||
|
__global const uint * img_sums_c1, |
||||||
|
__global const uint * img_sums_c2, |
||||||
|
__global const uint * img_sums_c3, |
||||||
|
int img_sums_offset, |
||||||
|
int img_sums_step, |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums_c0, |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums_c1, |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums_c2, |
||||||
|
__global const TYPE_IMAGE_SQSUM * img_sqsums_c3, |
||||||
|
int img_sqsums_offset, |
||||||
|
int img_sqsums_step, |
||||||
|
float tpl_sum_c0, |
||||||
|
float tpl_sum_c1, |
||||||
|
float tpl_sum_c2, |
||||||
|
float tpl_sum_c3, |
||||||
|
float tpl_sqsum |
||||||
|
) |
||||||
|
{ |
||||||
|
int gidx = get_global_id(0); |
||||||
|
int gidy = get_global_id(1); |
||||||
|
|
||||||
|
img_sqsums_step /= sizeof(*img_sqsums_c0); |
||||||
|
img_sqsums_offset /= sizeof(*img_sqsums_c0); |
||||||
|
img_sums_offset /= sizeof(*img_sums_c0); |
||||||
|
img_sums_step /= sizeof(*img_sums_c0); |
||||||
|
res_step /= sizeof(*res); |
||||||
|
res_offset /= sizeof(*res); |
||||||
|
|
||||||
|
int res_idx = mad24(gidy, res_step, res_offset + gidx); |
||||||
|
|
||||||
|
if(gidx < res_cols && gidy < res_rows) |
||||||
|
{ |
||||||
|
float image_sum_c0 = (float)( |
||||||
|
(img_sums_c0[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c0[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c0[SUMS_PTR(0, tpl_rows)] - img_sums_c0[SUMS_PTR(0, 0)])); |
||||||
|
float image_sum_c1 = (float)( |
||||||
|
(img_sums_c1[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c1[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c1[SUMS_PTR(0, tpl_rows)] - img_sums_c1[SUMS_PTR(0, 0)])); |
||||||
|
float image_sum_c2 = (float)( |
||||||
|
(img_sums_c2[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c2[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c2[SUMS_PTR(0, tpl_rows)] - img_sums_c2[SUMS_PTR(0, 0)])); |
||||||
|
float image_sum_c3 = (float)( |
||||||
|
(img_sums_c3[SUMS_PTR(tpl_cols, tpl_rows)] - img_sums_c3[SUMS_PTR(tpl_cols, 0)]) |
||||||
|
- (img_sums_c3[SUMS_PTR(0, tpl_rows)] - img_sums_c3[SUMS_PTR(0, 0)])); |
||||||
|
|
||||||
|
float image_sqsum_c0 = (float)( |
||||||
|
(img_sqsums_c0[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums_c0[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c0[SQSUMS_PTR(0, 0)])); |
||||||
|
float image_sqsum_c1 = (float)( |
||||||
|
(img_sqsums_c1[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums_c1[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c1[SQSUMS_PTR(0, 0)])); |
||||||
|
float image_sqsum_c2 = (float)( |
||||||
|
(img_sqsums_c2[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums_c2[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c2[SQSUMS_PTR(0, 0)])); |
||||||
|
float image_sqsum_c3 = (float)( |
||||||
|
(img_sqsums_c3[SQSUMS_PTR(tpl_cols, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(tpl_cols, 0)]) - |
||||||
|
(img_sqsums_c3[SQSUMS_PTR(0, tpl_rows)] - img_sqsums_c3[SQSUMS_PTR(0, 0)])); |
||||||
|
|
||||||
|
float num = res[res_idx] - |
||||||
|
image_sum_c0 * tpl_sum_c0 - |
||||||
|
image_sum_c1 * tpl_sum_c1 - |
||||||
|
image_sum_c2 * tpl_sum_c2 - |
||||||
|
image_sum_c3 * tpl_sum_c3; |
||||||
|
float denum = sqrt( tpl_sqsum * ( |
||||||
|
image_sqsum_c0 - weight * image_sum_c0 * image_sum_c0 + |
||||||
|
image_sqsum_c1 - weight * image_sum_c1 * image_sum_c1 + |
||||||
|
image_sqsum_c2 - weight * image_sum_c2 * image_sum_c2 + |
||||||
|
image_sqsum_c3 - weight * image_sum_c0 * image_sum_c3) |
||||||
|
); |
||||||
|
res[res_idx] = normAcc(num, denum); |
||||||
|
} |
||||||
|
} |
||||||
|
|
@ -0,0 +1,500 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// @Authors |
||||||
|
// Dachuan Zhao, dachuan@multicorewareinc.com |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other oclMaterials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors as is and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
#pragma OPENCL EXTENSION cl_amd_printf : enable |
||||||
|
|
||||||
|
|
||||||
|
uchar round_uchar_uchar(uchar v) |
||||||
|
{ |
||||||
|
return v; |
||||||
|
} |
||||||
|
|
||||||
|
uchar round_uchar_int(int v) |
||||||
|
{ |
||||||
|
return (uchar)((uint)v <= 255 ? v : v > 0 ? 255 : 0); |
||||||
|
} |
||||||
|
|
||||||
|
uchar round_uchar_float(float v) |
||||||
|
{ |
||||||
|
if(v - convert_int_sat_rte(v) > 1e-6 || v - convert_int_sat_rte(v) < -1e-6) |
||||||
|
{ |
||||||
|
if(((int)v + 1) - (v + 0.5f) < 1e-6 && ((int)v + 1) - (v + 0.5f) > -1e-6) |
||||||
|
{ |
||||||
|
v = (int)v + 0.51f; |
||||||
|
} |
||||||
|
} |
||||||
|
int iv = convert_int_sat_rte(v); |
||||||
|
return round_uchar_int(iv); |
||||||
|
} |
||||||
|
|
||||||
|
uchar4 round_uchar4_uchar4(uchar4 v) |
||||||
|
{ |
||||||
|
return v; |
||||||
|
} |
||||||
|
|
||||||
|
uchar4 round_uchar4_int4(int4 v) |
||||||
|
{ |
||||||
|
uchar4 result; |
||||||
|
result.x = (uchar)(v.x <= 255 ? v.x : v.x > 0 ? 255 : 0); |
||||||
|
result.y = (uchar)(v.y <= 255 ? v.y : v.y > 0 ? 255 : 0); |
||||||
|
result.z = (uchar)(v.z <= 255 ? v.z : v.z > 0 ? 255 : 0); |
||||||
|
result.w = (uchar)(v.w <= 255 ? v.w : v.w > 0 ? 255 : 0); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
uchar4 round_uchar4_float4(float4 v) |
||||||
|
{ |
||||||
|
if(v.x - convert_int_sat_rte(v.x) > 1e-6 || v.x - convert_int_sat_rte(v.x) < -1e-6) |
||||||
|
{ |
||||||
|
if(((int)(v.x) + 1) - (v.x + 0.5f) < 1e-6 && ((int)(v.x) + 1) - (v.x + 0.5f) > -1e-6) |
||||||
|
{ |
||||||
|
v.x = (int)(v.x) + 0.51f; |
||||||
|
} |
||||||
|
} |
||||||
|
if(v.y - convert_int_sat_rte(v.y) > 1e-6 || v.y - convert_int_sat_rte(v.y) < -1e-6) |
||||||
|
{ |
||||||
|
if(((int)(v.y) + 1) - (v.y + 0.5f) < 1e-6 && ((int)(v.y) + 1) - (v.y + 0.5f) > -1e-6) |
||||||
|
{ |
||||||
|
v.y = (int)(v.y) + 0.51f; |
||||||
|
} |
||||||
|
} |
||||||
|
if(v.z - convert_int_sat_rte(v.z) > 1e-6 || v.z - convert_int_sat_rte(v.z) < -1e-6) |
||||||
|
{ |
||||||
|
if(((int)(v.z) + 1) - (v.z + 0.5f) < 1e-6 && ((int)(v.z) + 1) - (v.z + 0.5f) > -1e-6) |
||||||
|
{ |
||||||
|
v.z = (int)(v.z) + 0.51f; |
||||||
|
} |
||||||
|
} |
||||||
|
if(v.w - convert_int_sat_rte(v.w) > 1e-6 || v.w - convert_int_sat_rte(v.w) < -1e-6) |
||||||
|
{ |
||||||
|
if(((int)(v.w) + 1) - (v.w + 0.5f) < 1e-6 && ((int)(v.w) + 1) - (v.w + 0.5f) > -1e-6) |
||||||
|
{ |
||||||
|
v.w = (int)(v.w) + 0.51f; |
||||||
|
} |
||||||
|
} |
||||||
|
int4 iv = convert_int4_sat_rte(v); |
||||||
|
return round_uchar4_int4(iv); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int idx_row_low(int y, int last_row) |
||||||
|
{ |
||||||
|
if(y < 0) |
||||||
|
{ |
||||||
|
y = -y; |
||||||
|
} |
||||||
|
return y % (last_row + 1); |
||||||
|
} |
||||||
|
|
||||||
|
int idx_row_high(int y, int last_row) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
int j; |
||||||
|
if(last_row - y < 0) |
||||||
|
{ |
||||||
|
i = (y - last_row); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
i = (last_row - y); |
||||||
|
} |
||||||
|
if(last_row - i < 0) |
||||||
|
{ |
||||||
|
j = i - last_row; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
j = last_row - i; |
||||||
|
} |
||||||
|
return j % (last_row + 1); |
||||||
|
} |
||||||
|
|
||||||
|
int idx_row(int y, int last_row) |
||||||
|
{ |
||||||
|
return idx_row_low(idx_row_high(y, last_row), last_row); |
||||||
|
} |
||||||
|
|
||||||
|
int idx_col_low(int x, int last_col) |
||||||
|
{ |
||||||
|
if(x < 0) |
||||||
|
{ |
||||||
|
x = -x; |
||||||
|
} |
||||||
|
return x % (last_col + 1); |
||||||
|
} |
||||||
|
|
||||||
|
int idx_col_high(int x, int last_col) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
int j; |
||||||
|
if(last_col - x < 0) |
||||||
|
{ |
||||||
|
i = (x - last_col); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
i = (last_col - x); |
||||||
|
} |
||||||
|
if(last_col - i < 0) |
||||||
|
{ |
||||||
|
j = i - last_col; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
j = last_col - i; |
||||||
|
} |
||||||
|
return j % (last_col + 1); |
||||||
|
} |
||||||
|
|
||||||
|
int idx_col(int x, int last_col) |
||||||
|
{ |
||||||
|
return idx_col_low(idx_col_high(x, last_col), last_col); |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols) |
||||||
|
{ |
||||||
|
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); |
||||||
|
const int y = get_group_id(1); |
||||||
|
|
||||||
|
__local float smem[256 + 4]; |
||||||
|
|
||||||
|
float sum; |
||||||
|
|
||||||
|
const int src_y = 2*y; |
||||||
|
const int last_row = srcRows - 1; |
||||||
|
const int last_col = srcCols - 1; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]); |
||||||
|
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]); |
||||||
|
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]); |
||||||
|
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]); |
||||||
|
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]); |
||||||
|
|
||||||
|
smem[2 + get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(0) < 2) |
||||||
|
{ |
||||||
|
const int left_x = x - 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]); |
||||||
|
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]); |
||||||
|
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]); |
||||||
|
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]); |
||||||
|
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]); |
||||||
|
|
||||||
|
smem[get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(0) > 253) |
||||||
|
{ |
||||||
|
const int right_x = x + 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]); |
||||||
|
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]); |
||||||
|
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]); |
||||||
|
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]); |
||||||
|
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]); |
||||||
|
|
||||||
|
smem[4 + get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
if (get_local_id(0) < 128) |
||||||
|
{ |
||||||
|
const int tid2 = get_local_id(0) * 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * smem[2 + tid2 - 2]; |
||||||
|
sum = sum + 0.25f * smem[2 + tid2 - 1]; |
||||||
|
sum = sum + 0.375f * smem[2 + tid2 ]; |
||||||
|
sum = sum + 0.25f * smem[2 + tid2 + 1]; |
||||||
|
sum = sum + 0.0625f * smem[2 + tid2 + 2]; |
||||||
|
|
||||||
|
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; |
||||||
|
|
||||||
|
if (dst_x < dstCols) |
||||||
|
dst[y * dstStep + dst_x] = round_uchar_float(sum); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar4 *dst, int dstStep, int dstOffset, int dstCols) |
||||||
|
{ |
||||||
|
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); |
||||||
|
const int y = get_group_id(1); |
||||||
|
|
||||||
|
__local float4 smem[256 + 4]; |
||||||
|
|
||||||
|
float4 sum; |
||||||
|
|
||||||
|
const int src_y = 2*y; |
||||||
|
const int last_row = srcRows - 1; |
||||||
|
const int last_col = srcCols - 1; |
||||||
|
|
||||||
|
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); |
||||||
|
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); |
||||||
|
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)])); |
||||||
|
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)])); |
||||||
|
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)])); |
||||||
|
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)])); |
||||||
|
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)])); |
||||||
|
|
||||||
|
smem[2 + get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(0) < 2) |
||||||
|
{ |
||||||
|
const int left_x = x - 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); |
||||||
|
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); |
||||||
|
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)])); |
||||||
|
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); |
||||||
|
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)])); |
||||||
|
|
||||||
|
smem[get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(0) > 253) |
||||||
|
{ |
||||||
|
const int right_x = x + 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); |
||||||
|
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); |
||||||
|
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)])); |
||||||
|
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); |
||||||
|
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)])); |
||||||
|
|
||||||
|
smem[4 + get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
if (get_local_id(0) < 128) |
||||||
|
{ |
||||||
|
const int tid2 = get_local_id(0) * 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * smem[2 + tid2 - 2]; |
||||||
|
sum = sum + co2 * smem[2 + tid2 - 1]; |
||||||
|
sum = sum + co1 * smem[2 + tid2 ]; |
||||||
|
sum = sum + co2 * smem[2 + tid2 + 1]; |
||||||
|
sum = sum + co3 * smem[2 + tid2 + 2]; |
||||||
|
|
||||||
|
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; |
||||||
|
|
||||||
|
if (dst_x < dstCols) |
||||||
|
dst[y * dstStep / 4 + dst_x] = round_uchar4_float4(sum); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void pyrDown_C1_D5(__global float * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float *dst, int dstStep, int dstOffset, int dstCols) |
||||||
|
{ |
||||||
|
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); |
||||||
|
const int y = get_group_id(1); |
||||||
|
|
||||||
|
__local float smem[256 + 4]; |
||||||
|
|
||||||
|
float sum; |
||||||
|
|
||||||
|
const int src_y = 2*y; |
||||||
|
const int last_row = srcRows - 1; |
||||||
|
const int last_col = srcCols - 1; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]; |
||||||
|
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]; |
||||||
|
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]; |
||||||
|
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]; |
||||||
|
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]; |
||||||
|
|
||||||
|
smem[2 + get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(0) < 2) |
||||||
|
{ |
||||||
|
const int left_x = x - 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]; |
||||||
|
|
||||||
|
smem[get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(0) > 253) |
||||||
|
{ |
||||||
|
const int right_x = x + 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + 0.375f * ((__global float*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + 0.25f * ((__global float*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + 0.0625f * ((__global float*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]; |
||||||
|
|
||||||
|
smem[4 + get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
if (get_local_id(0) < 128) |
||||||
|
{ |
||||||
|
const int tid2 = get_local_id(0) * 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + 0.0625f * smem[2 + tid2 - 2]; |
||||||
|
sum = sum + 0.25f * smem[2 + tid2 - 1]; |
||||||
|
sum = sum + 0.375f * smem[2 + tid2 ]; |
||||||
|
sum = sum + 0.25f * smem[2 + tid2 + 1]; |
||||||
|
sum = sum + 0.0625f * smem[2 + tid2 + 2]; |
||||||
|
|
||||||
|
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; |
||||||
|
|
||||||
|
if (dst_x < dstCols) |
||||||
|
dst[y * dstStep / 4 + dst_x] = sum; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void pyrDown_C4_D5(__global float4 * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global float4 *dst, int dstStep, int dstOffset, int dstCols) |
||||||
|
{ |
||||||
|
const int x = get_group_id(0) * get_local_size(0) + get_local_id(0); |
||||||
|
const int y = get_group_id(1); |
||||||
|
|
||||||
|
__local float4 smem[256 + 4]; |
||||||
|
|
||||||
|
float4 sum; |
||||||
|
|
||||||
|
const int src_y = 2*y; |
||||||
|
const int last_row = srcRows - 1; |
||||||
|
const int last_col = srcCols - 1; |
||||||
|
|
||||||
|
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); |
||||||
|
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); |
||||||
|
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]; |
||||||
|
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]; |
||||||
|
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]; |
||||||
|
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]; |
||||||
|
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]; |
||||||
|
|
||||||
|
smem[2 + get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(0) < 2) |
||||||
|
{ |
||||||
|
const int left_x = x - 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; |
||||||
|
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]; |
||||||
|
|
||||||
|
smem[get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(0) > 253) |
||||||
|
{ |
||||||
|
const int right_x = x + 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + co1 * ((__global float4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + co2 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; |
||||||
|
sum = sum + co3 * ((__global float4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]; |
||||||
|
|
||||||
|
smem[4 + get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
if (get_local_id(0) < 128) |
||||||
|
{ |
||||||
|
const int tid2 = get_local_id(0) * 2; |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
sum = sum + co3 * smem[2 + tid2 - 2]; |
||||||
|
sum = sum + co2 * smem[2 + tid2 - 1]; |
||||||
|
sum = sum + co1 * smem[2 + tid2 ]; |
||||||
|
sum = sum + co2 * smem[2 + tid2 + 1]; |
||||||
|
sum = sum + co3 * smem[2 + tid2 + 2]; |
||||||
|
|
||||||
|
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; |
||||||
|
|
||||||
|
if (dst_x < dstCols) |
||||||
|
dst[y * dstStep / 16 + dst_x] = sum; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,750 @@ |
|||||||
|
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||||
|
// |
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||||
|
// |
||||||
|
// By downloading, copying, installing or using the software you agree to this license. |
||||||
|
// If you do not agree to this license, do not download, install, |
||||||
|
// copy or use the software. |
||||||
|
// |
||||||
|
// |
||||||
|
// License Agreement |
||||||
|
// For Open Source Computer Vision Library |
||||||
|
// |
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. |
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||||
|
// Third party copyrights are property of their respective owners. |
||||||
|
// |
||||||
|
// @Authors |
||||||
|
// Zhang Chunpeng chunpeng@multicorewareinc.com |
||||||
|
// |
||||||
|
// Redistribution and use in source and binary forms, with or without modification, |
||||||
|
// are permitted provided that the following conditions are met: |
||||||
|
// |
||||||
|
// * Redistribution's of source code must retain the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer. |
||||||
|
// |
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||||
|
// this list of conditions and the following disclaimer in the documentation |
||||||
|
// and/or other oclMaterials provided with the distribution. |
||||||
|
// |
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products |
||||||
|
// derived from this software without specific prior written permission. |
||||||
|
// |
||||||
|
// This software is provided by the copyright holders and contributors as is and |
||||||
|
// any express or implied warranties, including, but not limited to, the implied |
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||||
|
// indirect, incidental, special, exemplary, or consequential damages |
||||||
|
// (including, but not limited to, procurement of substitute goods or services; |
||||||
|
// loss of use, data, or profits; or business interruption) however caused |
||||||
|
// and on any theory of liability, whether in contract, strict liability, |
||||||
|
// or tort (including negligence or otherwise) arising in any way out of |
||||||
|
// the use of this software, even if advised of the possibility of such damage. |
||||||
|
// |
||||||
|
//M*/ |
||||||
|
|
||||||
|
//#pragma OPENCL EXTENSION cl_amd_printf : enable |
||||||
|
|
||||||
|
uchar get_valid_uchar(uchar data) |
||||||
|
{ |
||||||
|
return (uchar)(data <= 255 ? data : data > 0 ? 255 : 0); |
||||||
|
} |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
////////////////////////// CV_8UC1 ////////////////////////////////// |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
__kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst, |
||||||
|
int srcRows,int dstRows,int srcCols,int dstCols, |
||||||
|
int srcOffset,int dstOffset,int srcStep,int dstStep) |
||||||
|
{ |
||||||
|
const int x = get_global_id(0); |
||||||
|
const int y = get_global_id(1); |
||||||
|
|
||||||
|
__local float s_srcPatch[10][10]; |
||||||
|
__local float s_dstPatch[20][16]; |
||||||
|
|
||||||
|
|
||||||
|
if( get_local_id(0) < 10 && get_local_id(1) < 10 ) |
||||||
|
{ |
||||||
|
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; |
||||||
|
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; |
||||||
|
|
||||||
|
srcx = abs(srcx); |
||||||
|
srcx = min(srcCols - 1,srcx); |
||||||
|
|
||||||
|
srcy = abs(srcy); |
||||||
|
srcy = min(srcRows -1 ,srcy); |
||||||
|
|
||||||
|
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
float sum = 0; |
||||||
|
const int evenFlag = (int)((get_local_id(0) & 1) == 0); |
||||||
|
const int oddFlag = (int)((get_local_id(0) & 1) != 0); |
||||||
|
const bool eveny = ((get_local_id(1) & 1) == 0); |
||||||
|
const int tidx = get_local_id(0); |
||||||
|
|
||||||
|
if(eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(1) < 2) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(1) > 13) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
const int tidy = get_local_id(1); |
||||||
|
|
||||||
|
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)]; |
||||||
|
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)]; |
||||||
|
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)]; |
||||||
|
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)]; |
||||||
|
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; |
||||||
|
|
||||||
|
if ((x < dstCols) && (y < dstRows)) |
||||||
|
dst[x + y * dstStep] = (float)(4.0f * sum); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
////////////////////////// CV_16UC1 ///////////////////////////////// |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
__kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst, |
||||||
|
int srcRows,int dstRows,int srcCols,int dstCols, |
||||||
|
int srcOffset,int dstOffset,int srcStep,int dstStep) |
||||||
|
{ |
||||||
|
const int x = get_global_id(0); |
||||||
|
const int y = get_global_id(1); |
||||||
|
|
||||||
|
__local float s_srcPatch[10][10]; |
||||||
|
__local float s_dstPatch[20][16]; |
||||||
|
|
||||||
|
srcStep = srcStep >> 1; |
||||||
|
dstStep = dstStep >> 1; |
||||||
|
srcOffset = srcOffset >> 1; |
||||||
|
dstOffset = dstOffset >> 1; |
||||||
|
|
||||||
|
|
||||||
|
if( get_local_id(0) < 10 && get_local_id(1) < 10 ) |
||||||
|
{ |
||||||
|
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; |
||||||
|
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; |
||||||
|
|
||||||
|
srcx = abs(srcx); |
||||||
|
srcx = min(srcCols - 1,srcx); |
||||||
|
|
||||||
|
srcy = abs(srcy); |
||||||
|
srcy = min(srcRows -1 ,srcy); |
||||||
|
|
||||||
|
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
float sum = 0; |
||||||
|
|
||||||
|
const int evenFlag = (int)((get_local_id(0) & 1) == 0); |
||||||
|
const int oddFlag = (int)((get_local_id(0) & 1) != 0); |
||||||
|
const bool eveny = ((get_local_id(1) & 1) == 0); |
||||||
|
const int tidx = get_local_id(0); |
||||||
|
|
||||||
|
if(eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(1) < 2) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(1) > 13) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
const int tidy = get_local_id(1); |
||||||
|
|
||||||
|
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)]; |
||||||
|
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)]; |
||||||
|
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)]; |
||||||
|
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)]; |
||||||
|
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; |
||||||
|
|
||||||
|
if ((x < dstCols) && (y < dstRows)) |
||||||
|
dst[x + y * dstStep] = (float)(4.0f * sum); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
////////////////////////// CV_32FC1 ///////////////////////////////// |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
__kernel void pyrUp_C1_D5(__global float* src,__global float* dst, |
||||||
|
int srcRows,int dstRows,int srcCols,int dstCols, |
||||||
|
int srcOffset,int dstOffset,int srcStep,int dstStep) |
||||||
|
{ |
||||||
|
const int x = get_global_id(0); |
||||||
|
const int y = get_global_id(1); |
||||||
|
|
||||||
|
__local float s_srcPatch[10][10]; |
||||||
|
__local float s_dstPatch[20][16]; |
||||||
|
|
||||||
|
srcOffset = srcOffset >> 2; |
||||||
|
dstOffset = dstOffset >> 2; |
||||||
|
srcStep = srcStep >> 2; |
||||||
|
dstStep = dstStep >> 2; |
||||||
|
|
||||||
|
|
||||||
|
if( get_local_id(0) < 10 && get_local_id(1) < 10 ) |
||||||
|
{ |
||||||
|
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; |
||||||
|
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; |
||||||
|
|
||||||
|
srcx = abs(srcx); |
||||||
|
srcx = min(srcCols - 1,srcx); |
||||||
|
|
||||||
|
srcy = abs(srcy); |
||||||
|
srcy = min(srcRows -1 ,srcy); |
||||||
|
|
||||||
|
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float)(src[srcx + srcy * srcStep]); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
float sum = 0; |
||||||
|
const int evenFlag = (int)((get_local_id(0) & 1) == 0); |
||||||
|
const int oddFlag = (int)((get_local_id(0) & 1) != 0); |
||||||
|
const bool eveny = ((get_local_id(1) & 1) == 0); |
||||||
|
const int tidx = get_local_id(0); |
||||||
|
|
||||||
|
if(eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(1) < 2) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(1) > 13) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx - 2) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx - 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.375f ) * s_srcPatch[9][1 + ((tidx ) >> 1)]; |
||||||
|
sum = sum + ( oddFlag * 0.25f ) * s_srcPatch[9][1 + ((tidx + 1) >> 1)]; |
||||||
|
sum = sum + (evenFlag * 0.0625f) * s_srcPatch[9][1 + ((tidx + 2) >> 1)]; |
||||||
|
} |
||||||
|
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
const int tidy = get_local_id(1); |
||||||
|
|
||||||
|
sum = sum + 0.0625f * s_dstPatch[2 + tidy - 2][get_local_id(0)]; |
||||||
|
sum = sum + 0.25f * s_dstPatch[2 + tidy - 1][get_local_id(0)]; |
||||||
|
sum = sum + 0.375f * s_dstPatch[2 + tidy ][get_local_id(0)]; |
||||||
|
sum = sum + 0.25f * s_dstPatch[2 + tidy + 1][get_local_id(0)]; |
||||||
|
sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; |
||||||
|
|
||||||
|
if ((x < dstCols) && (y < dstRows)) |
||||||
|
dst[x + y * dstStep] = (float)(4.0f * sum); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
////////////////////////// CV_8UC4 ////////////////////////////////// |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
float4 covert_uchar4_to_float4(uchar4 data) |
||||||
|
{ |
||||||
|
float4 f4Data = {0,0,0,0}; |
||||||
|
|
||||||
|
f4Data.x = (float)data.x; |
||||||
|
f4Data.y = (float)data.y; |
||||||
|
f4Data.z = (float)data.z; |
||||||
|
f4Data.w = (float)data.w; |
||||||
|
|
||||||
|
return f4Data; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
uchar4 convert_float4_to_uchar4(float4 data) |
||||||
|
{ |
||||||
|
uchar4 u4Data; |
||||||
|
|
||||||
|
u4Data.x = get_valid_uchar(data.x); |
||||||
|
u4Data.y = get_valid_uchar(data.y); |
||||||
|
u4Data.z = get_valid_uchar(data.z); |
||||||
|
u4Data.w = get_valid_uchar(data.w); |
||||||
|
|
||||||
|
return u4Data; |
||||||
|
} |
||||||
|
|
||||||
|
float4 int_x_float4(int leftOpr,float4 rightOpr) |
||||||
|
{ |
||||||
|
float4 result = {0,0,0,0}; |
||||||
|
|
||||||
|
result.x = rightOpr.x * leftOpr; |
||||||
|
result.y = rightOpr.y * leftOpr; |
||||||
|
result.z = rightOpr.z * leftOpr; |
||||||
|
result.w = rightOpr.w * leftOpr; |
||||||
|
|
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
float4 float4_x_float4(float4 leftOpr,float4 rightOpr) |
||||||
|
{ |
||||||
|
float4 result; |
||||||
|
|
||||||
|
result.x = leftOpr.x * rightOpr.x; |
||||||
|
result.y = leftOpr.y * rightOpr.y; |
||||||
|
result.z = leftOpr.z * rightOpr.z; |
||||||
|
result.w = leftOpr.w * rightOpr.w; |
||||||
|
|
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
__kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, |
||||||
|
int srcRows,int dstRows,int srcCols,int dstCols, |
||||||
|
int srcOffset,int dstOffset,int srcStep,int dstStep) |
||||||
|
{ |
||||||
|
const int x = get_global_id(0); |
||||||
|
const int y = get_global_id(1); |
||||||
|
|
||||||
|
__local float4 s_srcPatch[10][10]; |
||||||
|
__local float4 s_dstPatch[20][16]; |
||||||
|
|
||||||
|
srcOffset >>= 2; |
||||||
|
dstOffset >>= 2; |
||||||
|
srcStep >>= 2; |
||||||
|
dstStep >>= 2; |
||||||
|
|
||||||
|
|
||||||
|
if( get_local_id(0) < 10 && get_local_id(1) < 10 ) |
||||||
|
{ |
||||||
|
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; |
||||||
|
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; |
||||||
|
|
||||||
|
srcx = abs(srcx); |
||||||
|
srcx = min(srcCols - 1,srcx); |
||||||
|
|
||||||
|
srcy = abs(srcy); |
||||||
|
srcy = min(srcRows -1 ,srcy); |
||||||
|
|
||||||
|
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]); |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
float4 sum = (float4)(0,0,0,0); |
||||||
|
|
||||||
|
const int evenFlag = (int)((get_local_id(0) & 1) == 0); |
||||||
|
const int oddFlag = (int)((get_local_id(0) & 1) != 0); |
||||||
|
const bool eveny = ((get_local_id(1) & 1) == 0); |
||||||
|
const int tidx = get_local_id(0); |
||||||
|
|
||||||
|
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); |
||||||
|
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); |
||||||
|
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); |
||||||
|
|
||||||
|
|
||||||
|
if(eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(1) < 2) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]); |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(1) > 13) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]); |
||||||
|
|
||||||
|
} |
||||||
|
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
const int tidy = get_local_id(1); |
||||||
|
|
||||||
|
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]); |
||||||
|
|
||||||
|
if ((x < dstCols) && (y < dstRows)) |
||||||
|
{ |
||||||
|
dst[x + y * dstStep] = convert_float4_to_uchar4(int_x_float4(4.0f,sum)); |
||||||
|
} |
||||||
|
} |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
////////////////////////// CV_16UC4 ////////////////////////////////// |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
float4 covert_ushort4_to_float4(ushort4 data) |
||||||
|
{ |
||||||
|
float4 f4Data = {0,0,0,0}; |
||||||
|
|
||||||
|
f4Data.x = (float)data.x; |
||||||
|
f4Data.y = (float)data.y; |
||||||
|
f4Data.z = (float)data.z; |
||||||
|
f4Data.w = (float)data.w; |
||||||
|
|
||||||
|
return f4Data; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
ushort4 convert_float4_to_ushort4(float4 data) |
||||||
|
{ |
||||||
|
ushort4 u4Data; |
||||||
|
|
||||||
|
u4Data.x = (float)data.x; |
||||||
|
u4Data.y = (float)data.y; |
||||||
|
u4Data.z = (float)data.z; |
||||||
|
u4Data.w = (float)data.w; |
||||||
|
|
||||||
|
return u4Data; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
__kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, |
||||||
|
int srcRows,int dstRows,int srcCols,int dstCols, |
||||||
|
int srcOffset,int dstOffset,int srcStep,int dstStep) |
||||||
|
{ |
||||||
|
const int x = get_global_id(0); |
||||||
|
const int y = get_global_id(1); |
||||||
|
|
||||||
|
__local float4 s_srcPatch[10][10]; |
||||||
|
__local float4 s_dstPatch[20][16]; |
||||||
|
|
||||||
|
srcOffset >>= 3; |
||||||
|
dstOffset >>= 3; |
||||||
|
srcStep >>= 3; |
||||||
|
dstStep >>= 3; |
||||||
|
|
||||||
|
|
||||||
|
if( get_local_id(0) < 10 && get_local_id(1) < 10 ) |
||||||
|
{ |
||||||
|
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; |
||||||
|
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; |
||||||
|
|
||||||
|
srcx = abs(srcx); |
||||||
|
srcx = min(srcCols - 1,srcx); |
||||||
|
|
||||||
|
srcy = abs(srcy); |
||||||
|
srcy = min(srcRows -1 ,srcy); |
||||||
|
|
||||||
|
s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]); |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
float4 sum = (float4)(0,0,0,0); |
||||||
|
|
||||||
|
const int evenFlag = (int)((get_local_id(0) & 1) == 0); |
||||||
|
const int oddFlag = (int)((get_local_id(0) & 1) != 0); |
||||||
|
const bool eveny = ((get_local_id(1) & 1) == 0); |
||||||
|
const int tidx = get_local_id(0); |
||||||
|
|
||||||
|
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); |
||||||
|
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); |
||||||
|
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); |
||||||
|
|
||||||
|
|
||||||
|
if(eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(1) < 2) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]); |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(1) > 13) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]); |
||||||
|
|
||||||
|
} |
||||||
|
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
const int tidy = get_local_id(1); |
||||||
|
|
||||||
|
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]); |
||||||
|
|
||||||
|
if ((x < dstCols) && (y < dstRows)) |
||||||
|
{ |
||||||
|
dst[x + y * dstStep] = convert_float4_to_ushort4(int_x_float4(4.0f,sum)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
////////////////////////// CV_32FC4 ////////////////////////////////// |
||||||
|
/////////////////////////////////////////////////////////////////////// |
||||||
|
__kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, |
||||||
|
int srcRows,int dstRows,int srcCols,int dstCols, |
||||||
|
int srcOffset,int dstOffset,int srcStep,int dstStep) |
||||||
|
{ |
||||||
|
const int x = get_global_id(0); |
||||||
|
const int y = get_global_id(1); |
||||||
|
|
||||||
|
__local float4 s_srcPatch[10][10]; |
||||||
|
__local float4 s_dstPatch[20][16]; |
||||||
|
|
||||||
|
srcOffset >>= 4; |
||||||
|
dstOffset >>= 4; |
||||||
|
srcStep >>= 4; |
||||||
|
dstStep >>= 4; |
||||||
|
|
||||||
|
|
||||||
|
if( get_local_id(0) < 10 && get_local_id(1) < 10 ) |
||||||
|
{ |
||||||
|
int srcx = (int)(get_group_id(0) * get_local_size(0) / 2 + get_local_id(0)) - 1; |
||||||
|
int srcy = (int)(get_group_id(1) * get_local_size(1) / 2 + get_local_id(1)) - 1; |
||||||
|
|
||||||
|
srcx = abs(srcx); |
||||||
|
srcx = min(srcCols - 1,srcx); |
||||||
|
|
||||||
|
srcy = abs(srcy); |
||||||
|
srcy = min(srcRows -1 ,srcy); |
||||||
|
|
||||||
|
s_srcPatch[get_local_id(1)][get_local_id(0)] = (float4)(src[srcx + srcy * srcStep]); |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
float4 sum = (float4)(0,0,0,0); |
||||||
|
|
||||||
|
const int evenFlag = (int)((get_local_id(0) & 1) == 0); |
||||||
|
const int oddFlag = (int)((get_local_id(0) & 1) != 0); |
||||||
|
const bool eveny = ((get_local_id(1) & 1) == 0); |
||||||
|
const int tidx = get_local_id(0); |
||||||
|
|
||||||
|
float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); |
||||||
|
float4 co2 = (float4)(0.25f, 0.25f, 0.25f, 0.25f); |
||||||
|
float4 co3 = (float4)(0.0625f, 0.0625f, 0.0625f, 0.0625f); |
||||||
|
|
||||||
|
|
||||||
|
if(eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co1 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( evenFlag, co3 ) , s_srcPatch[1 + (get_local_id(1) >> 1)][1 + ((tidx + 2) >> 1)]); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[2 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
|
||||||
|
if (get_local_id(1) < 2) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1 ) , s_srcPatch[0][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2 ) , s_srcPatch[0][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[0][1 + ((tidx + 2) >> 1)]); |
||||||
|
} |
||||||
|
|
||||||
|
s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
if (get_local_id(1) > 13) |
||||||
|
{ |
||||||
|
sum = 0; |
||||||
|
|
||||||
|
if (eveny) |
||||||
|
{ |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx - 2) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx - 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co1) , s_srcPatch[9][1 + ((tidx ) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4( oddFlag , co2) , s_srcPatch[9][1 + ((tidx + 1) >> 1)]); |
||||||
|
sum = sum + float4_x_float4(int_x_float4(evenFlag , co3) , s_srcPatch[9][1 + ((tidx + 2) >> 1)]); |
||||||
|
|
||||||
|
} |
||||||
|
s_dstPatch[4 + get_local_id(1)][get_local_id(0)] = sum; |
||||||
|
} |
||||||
|
|
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
sum = 0; |
||||||
|
|
||||||
|
const int tidy = get_local_id(1); |
||||||
|
|
||||||
|
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy - 2][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy - 1][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co1 , s_dstPatch[2 + tidy ][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co2 , s_dstPatch[2 + tidy + 1][get_local_id(0)]); |
||||||
|
sum = sum + float4_x_float4(co3 , s_dstPatch[2 + tidy + 2][get_local_id(0)]); |
||||||
|
|
||||||
|
if ((x < dstCols) && (y < dstRows)) |
||||||
|
{ |
||||||
|
dst[x + y * dstStep] = 4.0f * sum; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,560 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <iomanip> |
||||||
|
#include "precomp.hpp" |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::ocl; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
#define EXT_FP64 0 |
||||||
|
|
||||||
|
#if !defined (HAVE_OPENCL) |
||||||
|
void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); } |
||||||
|
#else |
||||||
|
//helper routines
|
||||||
|
namespace cv |
||||||
|
{ |
||||||
|
namespace ocl |
||||||
|
{ |
||||||
|
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||||
|
extern const char *match_template; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
namespace cv { namespace ocl |
||||||
|
{ |
||||||
|
void matchTemplate_SQDIFF( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); |
||||||
|
|
||||||
|
void matchTemplate_SQDIFF_NORMED( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); |
||||||
|
|
||||||
|
void matchTemplate_CCORR( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); |
||||||
|
|
||||||
|
void matchTemplate_CCORR_NORMED( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); |
||||||
|
|
||||||
|
void matchTemplate_CCOFF( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); |
||||||
|
|
||||||
|
void matchTemplate_CCOFF_NORMED( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); |
||||||
|
|
||||||
|
|
||||||
|
void matchTemplateNaive_SQDIFF( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, int cn); |
||||||
|
|
||||||
|
void matchTemplateNaive_CCORR( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, int cn); |
||||||
|
|
||||||
|
// Evaluates optimal template's area threshold. If
|
||||||
|
// template's area is less than the threshold, we use naive match
|
||||||
|
// template version, otherwise FFT-based (if available)
|
||||||
|
int getTemplateThreshold(int method, int depth) |
||||||
|
{ |
||||||
|
switch (method) |
||||||
|
{ |
||||||
|
case CV_TM_CCORR:
|
||||||
|
if (depth == CV_32F) return 250; |
||||||
|
if (depth == CV_8U) return 300; |
||||||
|
break; |
||||||
|
case CV_TM_SQDIFF: |
||||||
|
if (depth == CV_32F) return MAXSHORT; // do naive SQDIFF for CV_32F
|
||||||
|
if (depth == CV_8U) return 300; |
||||||
|
break; |
||||||
|
} |
||||||
|
CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode"); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// SQDIFF
|
||||||
|
void matchTemplate_SQDIFF( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) |
||||||
|
{ |
||||||
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); |
||||||
|
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) |
||||||
|
{ |
||||||
|
matchTemplateNaive_SQDIFF(image, templ, result, image.channels()); |
||||||
|
return; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
// TODO
|
||||||
|
CV_Error(CV_StsBadArg, "Not supported yet for this size template"); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void matchTemplate_SQDIFF_NORMED( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) |
||||||
|
{ |
||||||
|
matchTemplate_CCORR(image,templ,result,buf); |
||||||
|
buf.image_sums.resize(1); |
||||||
|
buf.image_sqsums.resize(1); |
||||||
|
|
||||||
|
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); |
||||||
|
|
||||||
|
#if EXT_FP64 && SQRSUM_FIXED |
||||||
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; |
||||||
|
#else |
||||||
|
Mat sqr_mat = templ.reshape(1); |
||||||
|
unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0]; |
||||||
|
#endif |
||||||
|
|
||||||
|
Context *clCxt = image.clCxt; |
||||||
|
string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED"; |
||||||
|
vector< pair<size_t, const void *> > args; |
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); |
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1}; |
||||||
|
size_t localThreads[3] = {32, 8, 1}; |
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); |
||||||
|
} |
||||||
|
|
||||||
|
void matchTemplateNaive_SQDIFF( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, int cn) |
||||||
|
{ |
||||||
|
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) |
||||||
|
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F); |
||||||
|
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); |
||||||
|
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); |
||||||
|
|
||||||
|
Context *clCxt = image.clCxt; |
||||||
|
string kernelName = "matchTemplate_Naive_SQDIFF"; |
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args; |
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); |
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1}; |
||||||
|
size_t localThreads[3] = {32, 8, 1}; |
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// CCORR
|
||||||
|
void matchTemplate_CCORR( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) |
||||||
|
{ |
||||||
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); |
||||||
|
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) |
||||||
|
{ |
||||||
|
matchTemplateNaive_CCORR(image, templ, result, image.channels()); |
||||||
|
return; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
CV_Error(CV_StsBadArg, "Not supported yet for this size template"); |
||||||
|
if(image.depth() == CV_8U && templ.depth() == CV_8U) |
||||||
|
{ |
||||||
|
image.convertTo(buf.imagef, CV_32F); |
||||||
|
templ.convertTo(buf.templf, CV_32F); |
||||||
|
} |
||||||
|
CV_Assert(image.channels() == 1); |
||||||
|
oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels())); |
||||||
|
filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0)); |
||||||
|
result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1)); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void matchTemplate_CCORR_NORMED( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) |
||||||
|
{ |
||||||
|
matchTemplate_CCORR(image,templ,result,buf); |
||||||
|
buf.image_sums.resize(1); |
||||||
|
buf.image_sqsums.resize(1); |
||||||
|
|
||||||
|
integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); |
||||||
|
#if EXT_FP64 && SQRSUM_FIXED |
||||||
|
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; |
||||||
|
#elif EXT_FP64 |
||||||
|
oclMat templ_c1 = templ.reshape(1); |
||||||
|
multiply(templ_c1, templ_c1, templ_c1); |
||||||
|
unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0]; |
||||||
|
#else |
||||||
|
Mat m_templ_c1 = templ.reshape(1); |
||||||
|
multiply(m_templ_c1, m_templ_c1, m_templ_c1); |
||||||
|
unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0]; |
||||||
|
#endif |
||||||
|
Context *clCxt = image.clCxt; |
||||||
|
string kernelName = "normalizeKernel"; |
||||||
|
vector< pair<size_t, const void *> > args; |
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); |
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1}; |
||||||
|
size_t localThreads[3] = {32, 8, 1}; |
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); |
||||||
|
} |
||||||
|
|
||||||
|
void matchTemplateNaive_CCORR( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, int cn) |
||||||
|
{ |
||||||
|
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) |
||||||
|
|| (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F); |
||||||
|
CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); |
||||||
|
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); |
||||||
|
|
||||||
|
Context *clCxt = image.clCxt; |
||||||
|
string kernelName = "matchTemplate_Naive_CCORR"; |
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args; |
||||||
|
|
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); |
||||||
|
|
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1}; |
||||||
|
size_t localThreads[3] = {32, 8, 1}; |
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); |
||||||
|
} |
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// CCOFF
|
||||||
|
void matchTemplate_CCOFF( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) |
||||||
|
{ |
||||||
|
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U); |
||||||
|
|
||||||
|
matchTemplate_CCORR(image,templ,result,buf); |
||||||
|
|
||||||
|
Context *clCxt = image.clCxt; |
||||||
|
string kernelName; |
||||||
|
|
||||||
|
kernelName = "matchTemplate_Prepared_CCOFF"; |
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1}; |
||||||
|
size_t localThreads[3] = {32, 8, 1}; |
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args; |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); |
||||||
|
// to be continued in the following section
|
||||||
|
if(image.channels() == 1) |
||||||
|
{ |
||||||
|
buf.image_sums.resize(1); |
||||||
|
// FIXME: temp fix for incorrect integral kernel
|
||||||
|
oclMat tmp_oclmat; |
||||||
|
integral(image, buf.image_sums[0], tmp_oclmat); |
||||||
|
|
||||||
|
float templ_sum = 0; |
||||||
|
#if EXT_FP64 |
||||||
|
templ_sum = (float)sum(templ)[0] / templ.size().area(); |
||||||
|
#else |
||||||
|
Mat o_templ = templ; |
||||||
|
templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
|
||||||
|
#endif |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) ); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
Vec4f templ_sum = Vec4f::all(0); |
||||||
|
#if EXT_FP64 |
||||||
|
split(image,buf.images); |
||||||
|
templ_sum = sum(templ) / templ.size().area(); |
||||||
|
#else |
||||||
|
// temp fix for non-double supported machine
|
||||||
|
Mat o_templ = templ, o_image = image; |
||||||
|
vector<Mat> o_mat_vector; |
||||||
|
o_mat_vector.resize(image.channels()); |
||||||
|
buf.images.resize(image.channels()); |
||||||
|
split(o_image, o_mat_vector); |
||||||
|
for(int i = 0; i < o_mat_vector.size(); i ++) |
||||||
|
{ |
||||||
|
buf.images[i] = oclMat(o_mat_vector[i]); |
||||||
|
} |
||||||
|
templ_sum = sum(o_templ) / templ.size().area(); |
||||||
|
#endif |
||||||
|
buf.image_sums.resize(buf.images.size()); |
||||||
|
|
||||||
|
for(int i = 0; i < image.channels(); i ++) |
||||||
|
{ |
||||||
|
// FIXME: temp fix for incorrect integral kernel
|
||||||
|
oclMat omat_temp; |
||||||
|
integral(buf.images[i], buf.image_sums[i], omat_temp); |
||||||
|
} |
||||||
|
switch(image.channels()) |
||||||
|
{ |
||||||
|
case 4: |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) ); |
||||||
|
break; |
||||||
|
default: |
||||||
|
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); |
||||||
|
} |
||||||
|
|
||||||
|
void matchTemplate_CCOFF_NORMED( |
||||||
|
const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) |
||||||
|
{ |
||||||
|
image.convertTo(buf.imagef, CV_32F); |
||||||
|
templ.convertTo(buf.templf, CV_32F); |
||||||
|
|
||||||
|
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf); |
||||||
|
float scale = 1.f/templ.size().area(); |
||||||
|
|
||||||
|
Context *clCxt = image.clCxt; |
||||||
|
string kernelName; |
||||||
|
|
||||||
|
kernelName = "matchTemplate_Prepared_CCOFF_NORMED"; |
||||||
|
size_t globalThreads[3] = {result.cols, result.rows, 1}; |
||||||
|
size_t localThreads[3] = {32, 8, 1}; |
||||||
|
|
||||||
|
vector< pair<size_t, const void *> > args; |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
|
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&scale) ); |
||||||
|
// to be continued in the following section
|
||||||
|
if(image.channels() == 1) |
||||||
|
{ |
||||||
|
buf.image_sums.resize(1); |
||||||
|
buf.image_sqsums.resize(1); |
||||||
|
integral(image, buf.image_sums[0], buf.image_sqsums[0]); |
||||||
|
float templ_sum = 0; |
||||||
|
float templ_sqsum = 0; |
||||||
|
#if EXT_FP64 |
||||||
|
templ_sum = (float)sum(templ)[0]; |
||||||
|
#if SQRSUM_FIXED |
||||||
|
templ_sqsum = sqrSum(templ); |
||||||
|
#else |
||||||
|
oclMat templ_sqr = templ; |
||||||
|
multiply(templ,templ, templ_sqr); |
||||||
|
templ_sqsum = sum(templ_sqr)[0]; |
||||||
|
#endif //SQRSUM_FIXED
|
||||||
|
templ_sqsum -= scale * templ_sum * templ_sum; |
||||||
|
templ_sum *= scale; |
||||||
|
#else |
||||||
|
// temp fix for non-double supported machine
|
||||||
|
Mat o_templ = templ; |
||||||
|
templ_sum = (float)sum(o_templ)[0];
|
||||||
|
templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum; |
||||||
|
templ_sum *= scale; |
||||||
|
#endif |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) ); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
Vec4f templ_sum = Vec4f::all(0); |
||||||
|
Vec4f templ_sqsum = Vec4f::all(0); |
||||||
|
#if EXT_FP64 |
||||||
|
split(image,buf.images); |
||||||
|
templ_sum = sum(templ); |
||||||
|
#if SQRSUM_FIXED |
||||||
|
templ_sqsum = sqrSum(templ); |
||||||
|
#else |
||||||
|
oclMat templ_sqr = templ; |
||||||
|
multiply(templ,templ, templ_sqr); |
||||||
|
templ_sqsum = sum(templ_sqr); |
||||||
|
#endif //SQRSUM_FIXED
|
||||||
|
templ_sqsum -= scale * templ_sum * templ_sum; |
||||||
|
|
||||||
|
#else |
||||||
|
// temp fix for non-double supported machine
|
||||||
|
Mat o_templ = templ, o_image = image; |
||||||
|
|
||||||
|
vector<Mat> o_mat_vector; |
||||||
|
o_mat_vector.resize(image.channels()); |
||||||
|
buf.images.resize(image.channels()); |
||||||
|
split(o_image, o_mat_vector); |
||||||
|
for(int i = 0; i < o_mat_vector.size(); i ++) |
||||||
|
{ |
||||||
|
buf.images[i] = oclMat(o_mat_vector[i]); |
||||||
|
} |
||||||
|
templ_sum = sum(o_templ); |
||||||
|
templ_sqsum = sum(o_templ.mul(o_templ)); |
||||||
|
#endif |
||||||
|
float templ_sqsum_sum = 0; |
||||||
|
for(int i = 0; i < image.channels(); i ++) |
||||||
|
{ |
||||||
|
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i]; |
||||||
|
} |
||||||
|
templ_sum *= scale; |
||||||
|
buf.image_sums.resize(buf.images.size()); |
||||||
|
buf.image_sqsums.resize(buf.images.size()); |
||||||
|
|
||||||
|
for(int i = 0; i < image.channels(); i ++) |
||||||
|
{ |
||||||
|
integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]); |
||||||
|
} |
||||||
|
|
||||||
|
switch(image.channels()) |
||||||
|
{ |
||||||
|
case 4: |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) ); |
||||||
|
args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) ); |
||||||
|
break; |
||||||
|
default: |
||||||
|
CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); |
||||||
|
} |
||||||
|
|
||||||
|
}/*ocl*/} /*cv*/ |
||||||
|
|
||||||
|
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method) |
||||||
|
{ |
||||||
|
MatchTemplateBuf buf; |
||||||
|
matchTemplate(image,templ, result, method,buf); |
||||||
|
} |
||||||
|
void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf) |
||||||
|
{ |
||||||
|
CV_Assert(image.type() == templ.type()); |
||||||
|
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows); |
||||||
|
|
||||||
|
typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&); |
||||||
|
|
||||||
|
const Caller callers[] = {
|
||||||
|
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
|
||||||
|
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
|
||||||
|
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED |
||||||
|
}; |
||||||
|
|
||||||
|
Caller caller = callers[method]; |
||||||
|
CV_Assert(caller); |
||||||
|
caller(image, templ, result, buf); |
||||||
|
} |
||||||
|
#endif //
|
@ -0,0 +1,115 @@ |
|||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::ocl; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
using std::cout; |
||||||
|
using std::endl; |
||||||
|
|
||||||
|
namespace cv |
||||||
|
{ |
||||||
|
namespace ocl |
||||||
|
{ |
||||||
|
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||||
|
extern const char *pyr_down; |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
/////////////////////// add subtract multiply divide /////////////////////////
|
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
template<typename T> |
||||||
|
void pyrdown_run(const oclMat &src, const oclMat &dst) |
||||||
|
{ |
||||||
|
CV_Assert(src.cols / 2 == dst.cols && src.rows / 2 == dst.rows); |
||||||
|
|
||||||
|
CV_Assert(src.type() == dst.type()); |
||||||
|
CV_Assert(src.depth() != CV_8S); |
||||||
|
|
||||||
|
Context *clCxt = src.clCxt; |
||||||
|
//int channels = dst.channels();
|
||||||
|
//int depth = dst.depth();
|
||||||
|
|
||||||
|
string kernelName = "pyrDown"; |
||||||
|
|
||||||
|
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
|
||||||
|
// {4, 0, 4, 4, 1, 1, 1},
|
||||||
|
// {4, 0, 4, 4, 1, 1, 1},
|
||||||
|
// {4, 0, 4, 4, 1, 1, 1}
|
||||||
|
//};
|
||||||
|
|
||||||
|
//size_t vector_length = vector_lengths[channels-1][depth];
|
||||||
|
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
|
||||||
|
|
||||||
|
size_t localThreads[3] = { 256, 1, 1 }; |
||||||
|
size_t globalThreads[3] = { src.cols, dst.rows, 1}; |
||||||
|
|
||||||
|
//int dst_step1 = dst.cols * dst.elemSize();
|
||||||
|
vector<pair<size_t , const void *> > args; |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); |
||||||
|
|
||||||
|
openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); |
||||||
|
} |
||||||
|
void pyrdown_run(const oclMat &src, const oclMat &dst) |
||||||
|
{ |
||||||
|
switch(src.depth()) |
||||||
|
{ |
||||||
|
case 0: |
||||||
|
pyrdown_run<unsigned char>(src, dst); |
||||||
|
break; |
||||||
|
|
||||||
|
case 1: |
||||||
|
pyrdown_run<char>(src, dst); |
||||||
|
break; |
||||||
|
|
||||||
|
case 2: |
||||||
|
pyrdown_run<unsigned short>(src, dst); |
||||||
|
break; |
||||||
|
|
||||||
|
case 3: |
||||||
|
pyrdown_run<short>(src, dst); |
||||||
|
break; |
||||||
|
|
||||||
|
case 4: |
||||||
|
pyrdown_run<int>(src, dst); |
||||||
|
break; |
||||||
|
|
||||||
|
case 5: |
||||||
|
pyrdown_run<float>(src, dst); |
||||||
|
break; |
||||||
|
|
||||||
|
case 6: |
||||||
|
pyrdown_run<double>(src, dst); |
||||||
|
break; |
||||||
|
|
||||||
|
default: |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
// pyrDown
|
||||||
|
|
||||||
|
void cv::ocl::pyrDown(const oclMat& src, oclMat& dst) |
||||||
|
{ |
||||||
|
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); |
||||||
|
|
||||||
|
//src.step = src.rows;
|
||||||
|
|
||||||
|
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); |
||||||
|
|
||||||
|
//dst.step = dst.rows;
|
||||||
|
|
||||||
|
pyrdown_run(src, dst); |
||||||
|
} |
||||||
|
|
@ -0,0 +1,88 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
/* Haar features calculation */ |
||||||
|
//#define EMU
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::ocl; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
#ifndef HAVE_OPENCL |
||||||
|
void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); } |
||||||
|
#else |
||||||
|
|
||||||
|
namespace cv { namespace ocl
|
||||||
|
{
|
||||||
|
extern const char *pyr_up; |
||||||
|
void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst) |
||||||
|
{ |
||||||
|
dst.create(src.rows * 2, src.cols * 2, src.type()); |
||||||
|
Context *clCxt = src.clCxt; |
||||||
|
|
||||||
|
const std::string kernelName = "pyrUp"; |
||||||
|
|
||||||
|
std::vector< pair<size_t, const void *> > args; |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); |
||||||
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); |
||||||
|
|
||||||
|
size_t globalThreads[3] = {dst.cols, dst.rows, 1}; |
||||||
|
size_t localThreads[3] = {16, 16, 1}; |
||||||
|
|
||||||
|
openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); |
||||||
|
} |
||||||
|
}}; |
||||||
|
#endif // HAVE_OPENCL
|
@ -0,0 +1,82 @@ |
|||||||
|
#include "precomp.hpp" |
||||||
|
#include <iomanip> |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::ocl; |
||||||
|
using namespace cvtest; |
||||||
|
using namespace testing; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
template <typename T> |
||||||
|
void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold) |
||||||
|
{ |
||||||
|
result_gold.create(img1.size(), img1.type()); |
||||||
|
|
||||||
|
int cn = img1.channels(); |
||||||
|
|
||||||
|
for (int y = 0; y < img1.rows; ++y) |
||||||
|
{ |
||||||
|
const float* weights1_row = weights1.ptr<float>(y); |
||||||
|
const float* weights2_row = weights2.ptr<float>(y); |
||||||
|
const T* img1_row = img1.ptr<T>(y); |
||||||
|
const T* img2_row = img2.ptr<T>(y); |
||||||
|
T* result_gold_row = result_gold.ptr<T>(y); |
||||||
|
|
||||||
|
for (int x = 0; x < img1.cols * cn; ++x) |
||||||
|
{ |
||||||
|
float w1 = weights1_row[x / cn]; |
||||||
|
float w2 = weights2_row[x / cn]; |
||||||
|
result_gold_row[x] = static_cast<T>((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
PARAM_TEST_CASE(Blend, cv::Size, MatType/*, UseRoi*/) |
||||||
|
{ |
||||||
|
//cv::gpu::DeviceInfo devInfo;
|
||||||
|
cv::Size size; |
||||||
|
int type; |
||||||
|
bool useRoi; |
||||||
|
|
||||||
|
virtual void SetUp() |
||||||
|
{ |
||||||
|
//devInfo = GET_PARAM(0);
|
||||||
|
size = GET_PARAM(0); |
||||||
|
type = GET_PARAM(1); |
||||||
|
/*useRoi = GET_PARAM(3);*/ |
||||||
|
|
||||||
|
//cv::gpu::setDevice(devInfo.deviceID());
|
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST_P(Blend, Accuracy) |
||||||
|
{ |
||||||
|
int depth = CV_MAT_DEPTH(type); |
||||||
|
|
||||||
|
cv::Mat img1 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0); |
||||||
|
cv::Mat img2 = randomMat(size, type, 0.0, depth == CV_8U ? 255.0 : 1.0); |
||||||
|
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); |
||||||
|
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); |
||||||
|
|
||||||
|
cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F); |
||||||
|
cv::ocl::oclMat dst(size, type); |
||||||
|
gimg1.upload(img1); |
||||||
|
gimg2.upload(img2); |
||||||
|
gweights1.upload(weights1); |
||||||
|
gweights2.upload(weights2); |
||||||
|
cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst); |
||||||
|
cv::Mat result; |
||||||
|
cv::Mat result_gold; |
||||||
|
dst.download(result); |
||||||
|
if (depth == CV_8U) |
||||||
|
blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold); |
||||||
|
else |
||||||
|
blendLinearGold<float>(img1, img2, weights1, weights2, result_gold); |
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(result_gold, result, CV_MAT_DEPTH(type) == CV_8U ? 1 : 1e-5f, NULL) |
||||||
|
} |
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( |
||||||
|
DIFFERENT_SIZES, |
||||||
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) |
||||||
|
)); |
@ -0,0 +1,105 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Chunpeng Zhang chunpeng@multicorewareinc.com
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include <iomanip> |
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
/// ColumnSum
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL |
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// ColumnSum
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(ColumnSum, cv::Size, bool ) |
||||||
|
{ |
||||||
|
cv::Size size; |
||||||
|
cv::Mat src; |
||||||
|
bool useRoi; |
||||||
|
|
||||||
|
virtual void SetUp() |
||||||
|
{ |
||||||
|
size = GET_PARAM(0); |
||||||
|
useRoi = GET_PARAM(1); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST_P(ColumnSum, Accuracy) |
||||||
|
{ |
||||||
|
cv::Mat src = randomMat(size, CV_32FC1); |
||||||
|
//cv::Mat src(size,CV_32FC1);
|
||||||
|
|
||||||
|
//cv::ocl::oclMat d_dst = ::createMat(size,src.type(),useRoi);
|
||||||
|
cv::ocl::oclMat d_dst = loadMat(src,useRoi); |
||||||
|
|
||||||
|
cv::ocl::columnSum(loadMat(src,useRoi),d_dst); |
||||||
|
|
||||||
|
cv::Mat dst(d_dst); |
||||||
|
|
||||||
|
for (int j = 0; j < src.cols; ++j) |
||||||
|
{ |
||||||
|
float gold = src.at<float>(0, j); |
||||||
|
float res = dst.at<float>(0, j); |
||||||
|
ASSERT_NEAR(res, gold, 1e-5); |
||||||
|
} |
||||||
|
|
||||||
|
for (int i = 1; i < src.rows; ++i) |
||||||
|
{ |
||||||
|
for (int j = 0; j < src.cols; ++j) |
||||||
|
{ |
||||||
|
float gold = src.at<float>(i, j) += src.at<float>(i - 1, j); |
||||||
|
float res = dst.at<float>(i, j); |
||||||
|
ASSERT_NEAR(res, gold, 1e-5); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine( |
||||||
|
DIFFERENT_SIZES,testing::Values(Inverse(false),Inverse(true)))); |
||||||
|
|
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,166 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors as is and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#define PERF_TEST 0 |
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// MatchTemplate
|
||||||
|
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED)) |
||||||
|
|
||||||
|
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size); |
||||||
|
|
||||||
|
const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"}; |
||||||
|
|
||||||
|
PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMethod) |
||||||
|
{ |
||||||
|
cv::Size size; |
||||||
|
cv::Size templ_size; |
||||||
|
int cn; |
||||||
|
int method; |
||||||
|
|
||||||
|
virtual void SetUp() |
||||||
|
{ |
||||||
|
size = GET_PARAM(0); |
||||||
|
templ_size = GET_PARAM(1); |
||||||
|
cn = GET_PARAM(2); |
||||||
|
method = GET_PARAM(3); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST_P(MatchTemplate8U, Accuracy) |
||||||
|
{ |
||||||
|
|
||||||
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; |
||||||
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; |
||||||
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; |
||||||
|
std::cout << "Channels: " << cn << std::endl; |
||||||
|
|
||||||
|
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); |
||||||
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); |
||||||
|
|
||||||
|
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); |
||||||
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); |
||||||
|
|
||||||
|
cv::Mat dst_gold; |
||||||
|
cv::matchTemplate(image, templ, dst_gold, method); |
||||||
|
|
||||||
|
char sss [100] = ""; |
||||||
|
|
||||||
|
cv::Mat mat_dst; |
||||||
|
dst.download(mat_dst); |
||||||
|
|
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); |
||||||
|
|
||||||
|
#if PERF_TEST |
||||||
|
{ |
||||||
|
P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); |
||||||
|
P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); |
||||||
|
} |
||||||
|
#endif // PERF_TEST
|
||||||
|
} |
||||||
|
|
||||||
|
PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod) |
||||||
|
{ |
||||||
|
cv::Size size; |
||||||
|
cv::Size templ_size; |
||||||
|
int cn; |
||||||
|
int method; |
||||||
|
|
||||||
|
virtual void SetUp() |
||||||
|
{ |
||||||
|
size = GET_PARAM(0); |
||||||
|
templ_size = GET_PARAM(1); |
||||||
|
cn = GET_PARAM(2); |
||||||
|
method = GET_PARAM(3); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST_P(MatchTemplate32F, Accuracy) |
||||||
|
{ |
||||||
|
cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn)); |
||||||
|
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); |
||||||
|
|
||||||
|
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); |
||||||
|
cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); |
||||||
|
|
||||||
|
cv::Mat dst_gold; |
||||||
|
cv::matchTemplate(image, templ, dst_gold, method); |
||||||
|
|
||||||
|
char sss [100] = ""; |
||||||
|
|
||||||
|
cv::Mat mat_dst; |
||||||
|
dst.download(mat_dst); |
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); |
||||||
|
|
||||||
|
#if PERF_TEST |
||||||
|
{ |
||||||
|
std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; |
||||||
|
std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; |
||||||
|
std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; |
||||||
|
std::cout << "Channels: " << cn << std::endl; |
||||||
|
P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); |
||||||
|
P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); |
||||||
|
} |
||||||
|
#endif // PERF_TEST
|
||||||
|
} |
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
|
||||||
|
testing::Combine( |
||||||
|
DIFFERENT_SIZES, |
||||||
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), |
||||||
|
testing::Values(Channels(1), Channels(4)), |
||||||
|
ALL_TEMPLATE_METHODS |
||||||
|
) |
||||||
|
); |
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( |
||||||
|
DIFFERENT_SIZES, |
||||||
|
testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), |
||||||
|
testing::Values(Channels(1), Channels(4)), |
||||||
|
testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); |
||||||
|
|
@ -0,0 +1,295 @@ |
|||||||
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
//#define PRINT_CPU_TIME 1000
|
||||||
|
//#define PRINT_TIME
|
||||||
|
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include <iomanip> |
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL |
||||||
|
|
||||||
|
using namespace cv; |
||||||
|
using namespace cv::ocl; |
||||||
|
using namespace cvtest; |
||||||
|
using namespace testing; |
||||||
|
using namespace std; |
||||||
|
|
||||||
|
PARAM_TEST_CASE(PyrDown, MatType, bool) |
||||||
|
{ |
||||||
|
int type; |
||||||
|
cv::Scalar val; |
||||||
|
|
||||||
|
//src mat
|
||||||
|
cv::Mat mat1; |
||||||
|
cv::Mat mat2; |
||||||
|
cv::Mat mask; |
||||||
|
cv::Mat dst; |
||||||
|
cv::Mat dst1; //bak, for two outputs
|
||||||
|
|
||||||
|
// set up roi
|
||||||
|
int roicols; |
||||||
|
int roirows; |
||||||
|
int src1x; |
||||||
|
int src1y; |
||||||
|
int src2x; |
||||||
|
int src2y; |
||||||
|
int dstx; |
||||||
|
int dsty; |
||||||
|
int maskx; |
||||||
|
int masky; |
||||||
|
|
||||||
|
|
||||||
|
//src mat with roi
|
||||||
|
cv::Mat mat1_roi; |
||||||
|
cv::Mat mat2_roi; |
||||||
|
cv::Mat mask_roi; |
||||||
|
cv::Mat dst_roi; |
||||||
|
cv::Mat dst1_roi; //bak
|
||||||
|
std::vector<cv::ocl::Info> oclinfo; |
||||||
|
//ocl dst mat for testing
|
||||||
|
cv::ocl::oclMat gdst_whole; |
||||||
|
cv::ocl::oclMat gdst1_whole; //bak
|
||||||
|
|
||||||
|
//ocl mat with roi
|
||||||
|
cv::ocl::oclMat gmat1; |
||||||
|
cv::ocl::oclMat gmat2; |
||||||
|
cv::ocl::oclMat gdst; |
||||||
|
cv::ocl::oclMat gdst1; //bak
|
||||||
|
cv::ocl::oclMat gmask; |
||||||
|
|
||||||
|
virtual void SetUp() |
||||||
|
{ |
||||||
|
type = GET_PARAM(0); |
||||||
|
|
||||||
|
cv::RNG &rng = TS::ptr()->get_rng(); |
||||||
|
|
||||||
|
cv::Size size(MWIDTH, MHEIGHT); |
||||||
|
|
||||||
|
mat1 = randomMat(rng, size, type, 5, 16, false); |
||||||
|
mat2 = randomMat(rng, size, type, 5, 16, false); |
||||||
|
dst = randomMat(rng, size, type, 5, 16, false); |
||||||
|
dst1 = randomMat(rng, size, type, 5, 16, false); |
||||||
|
mask = randomMat(rng, size, CV_8UC1, 0, 2, false); |
||||||
|
|
||||||
|
cv::threshold(mask, mask, 0.5, 255., CV_8UC1); |
||||||
|
|
||||||
|
val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); |
||||||
|
|
||||||
|
//int devnums = getDevice(oclinfo);
|
||||||
|
//CV_Assert(devnums > 0);
|
||||||
|
//if you want to use undefault device, set it here
|
||||||
|
//setDevice(oclinfo[0]);
|
||||||
|
} |
||||||
|
|
||||||
|
void Cleanup() |
||||||
|
{ |
||||||
|
mat1.release(); |
||||||
|
mat2.release(); |
||||||
|
mask.release(); |
||||||
|
dst.release(); |
||||||
|
dst1.release(); |
||||||
|
mat1_roi.release(); |
||||||
|
mat2_roi.release(); |
||||||
|
mask_roi.release(); |
||||||
|
dst_roi.release(); |
||||||
|
dst1_roi.release(); |
||||||
|
|
||||||
|
gdst_whole.release(); |
||||||
|
gdst1_whole.release(); |
||||||
|
gmat1.release(); |
||||||
|
gmat2.release(); |
||||||
|
gdst.release(); |
||||||
|
gdst1.release(); |
||||||
|
gmask.release(); |
||||||
|
} |
||||||
|
|
||||||
|
void random_roi() |
||||||
|
{ |
||||||
|
cv::RNG &rng = TS::ptr()->get_rng(); |
||||||
|
|
||||||
|
#ifdef RANDOMROI |
||||||
|
//randomize ROI
|
||||||
|
roicols = rng.uniform(1, mat1.cols); |
||||||
|
roirows = rng.uniform(1, mat1.rows); |
||||||
|
src1x = rng.uniform(0, mat1.cols - roicols); |
||||||
|
src1y = rng.uniform(0, mat1.rows - roirows); |
||||||
|
dstx = rng.uniform(0, dst.cols - roicols); |
||||||
|
dsty = rng.uniform(0, dst.rows - roirows); |
||||||
|
#else |
||||||
|
roicols = mat1.cols; |
||||||
|
roirows = mat1.rows; |
||||||
|
src1x = 0; |
||||||
|
src1y = 0; |
||||||
|
dstx = 0; |
||||||
|
dsty = 0; |
||||||
|
#endif |
||||||
|
maskx = rng.uniform(0, mask.cols - roicols); |
||||||
|
masky = rng.uniform(0, mask.rows - roirows); |
||||||
|
src2x = rng.uniform(0, mat2.cols - roicols); |
||||||
|
src2y = rng.uniform(0, mat2.rows - roirows); |
||||||
|
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); |
||||||
|
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); |
||||||
|
mask_roi = mask(Rect(maskx, masky, roicols, roirows)); |
||||||
|
dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); |
||||||
|
dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); |
||||||
|
|
||||||
|
gdst_whole = dst; |
||||||
|
gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); |
||||||
|
|
||||||
|
gdst1_whole = dst1; |
||||||
|
gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); |
||||||
|
|
||||||
|
gmat1 = mat1_roi; |
||||||
|
gmat2 = mat2_roi; |
||||||
|
gmask = mask_roi; //end
|
||||||
|
} |
||||||
|
|
||||||
|
}; |
||||||
|
|
||||||
|
#define VARNAME(A) string(#A); |
||||||
|
|
||||||
|
|
||||||
|
void PrePrint() |
||||||
|
{ |
||||||
|
//for(int i = 0; i < MHEIGHT; i++)
|
||||||
|
//{
|
||||||
|
// printf("(%d) ", i);
|
||||||
|
// for(int k = 0; k < MWIDTH; k++)
|
||||||
|
// {
|
||||||
|
// printf("%d ", mat1_roi.data[i * MHEIGHT + k]);
|
||||||
|
// }
|
||||||
|
// printf("\n");
|
||||||
|
//}
|
||||||
|
} |
||||||
|
|
||||||
|
void PostPrint() |
||||||
|
{ |
||||||
|
//dst_roi.convertTo(dst_roi,CV_32S);
|
||||||
|
//cpu_dst.convertTo(cpu_dst,CV_32S);
|
||||||
|
//dst_roi -= cpu_dst;
|
||||||
|
//cpu_dst -= dst_roi;
|
||||||
|
//for(int i = 0; i < MHEIGHT / 2; i++)
|
||||||
|
//{
|
||||||
|
// printf("(%d) ", i);
|
||||||
|
// for(int k = 0; k < MWIDTH / 2; k++)
|
||||||
|
// {
|
||||||
|
// if(gmat1.depth() == 0)
|
||||||
|
// {
|
||||||
|
// if(gmat1.channels() == 1)
|
||||||
|
// {
|
||||||
|
// printf("%d ", dst_roi.data[i * MHEIGHT / 2 + k]);
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// printf("%d ", ((unsigned*)dst_roi.data)[i * MHEIGHT / 2 + k]);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else if(gmat1.depth() == 5)
|
||||||
|
// {
|
||||||
|
// printf("%.6f ", ((float*)dst_roi.data)[i * MHEIGHT / 2 + k]);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// printf("\n");
|
||||||
|
//}
|
||||||
|
//for(int i = 0; i < MHEIGHT / 2; i++)
|
||||||
|
//{
|
||||||
|
// printf("(%d) ", i);
|
||||||
|
// for(int k = 0; k < MWIDTH / 2; k++)
|
||||||
|
// {
|
||||||
|
// if(gmat1.depth() == 0)
|
||||||
|
// {
|
||||||
|
// if(gmat1.channels() == 1)
|
||||||
|
// {
|
||||||
|
// printf("%d ", cpu_dst.data[i * MHEIGHT / 2 + k]);
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// printf("%d ", ((unsigned*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else if(gmat1.depth() == 5)
|
||||||
|
// {
|
||||||
|
// printf("%.6f ", ((float*)cpu_dst.data)[i * MHEIGHT / 2 + k]);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// printf("\n");
|
||||||
|
//}
|
||||||
|
} |
||||||
|
|
||||||
|
////////////////////////////////PyrDown/////////////////////////////////////////////////
|
||||||
|
//struct PyrDown : ArithmTestBase {};
|
||||||
|
|
||||||
|
TEST_P(PyrDown, Mat) |
||||||
|
{ |
||||||
|
for(int j = 0; j < LOOP_TIMES; j++) |
||||||
|
{ |
||||||
|
random_roi(); |
||||||
|
|
||||||
|
cv::pyrDown(mat1_roi, dst_roi); |
||||||
|
cv::ocl::pyrDown(gmat1, gdst); |
||||||
|
|
||||||
|
cv::Mat cpu_dst; |
||||||
|
gdst.download(cpu_dst); |
||||||
|
char s[1024]; |
||||||
|
sprintf(s, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,maskx=%d,masky=%d,src2x=%d,src2y=%d", roicols, roirows, src1x, src1y, dstx, dsty, maskx, masky, src2x, src2y); |
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst_roi, cpu_dst, dst_roi.depth() == CV_32F ? 1e-5f : 1.0f, s); |
||||||
|
|
||||||
|
Cleanup(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//********test****************
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine( |
||||||
|
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), |
||||||
|
Values(false))); // Values(false) is the reserved parameter
|
||||||
|
|
||||||
|
|
||||||
|
#endif // HAVE_OPENCL
|
@ -0,0 +1,97 @@ |
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp" |
||||||
|
#include "opencv2/core/core.hpp" |
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(PyrUp,cv::Size,int) |
||||||
|
{ |
||||||
|
cv::Size size; |
||||||
|
int type; |
||||||
|
|
||||||
|
virtual void SetUp() |
||||||
|
{ |
||||||
|
size = GET_PARAM(0); |
||||||
|
type = GET_PARAM(1); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
TEST_P(PyrUp,Accuracy) |
||||||
|
{ |
||||||
|
cv::Mat src = randomMat(size,type); |
||||||
|
|
||||||
|
|
||||||
|
cv::Mat dst_gold; |
||||||
|
cv::pyrUp(src,dst_gold); |
||||||
|
|
||||||
|
cv::ocl::oclMat dst; |
||||||
|
cv::ocl::oclMat srcMat(src); |
||||||
|
cv::ocl::pyrUp(srcMat,dst); |
||||||
|
char s[100]={0}; |
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst_gold, dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
|
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
#if 1 |
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, testing::Combine( |
||||||
|
testing::Values(cv::Size(32, 32)), |
||||||
|
testing::Values(MatType(CV_8UC1),MatType(CV_16UC1),MatType(CV_32FC1),MatType(CV_8UC4), |
||||||
|
MatType(CV_16UC4),MatType(CV_32FC4)))); |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif // HAVE_OPENCL
|
Loading…
Reference in new issue