mirror of https://github.com/opencv/opencv.git
parent
a1d8091e10
commit
2e685dcf0a
13 changed files with 415 additions and 676 deletions
@ -1,192 +0,0 @@ |
||||
/*M/////////////////////////////////////////////////////////////////////////////////////// |
||||
// |
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
||||
// |
||||
// By downloading, copying, installing or using the software you agree to this license. |
||||
// If you do not agree to this license, do not download, install, |
||||
// copy or use the software. |
||||
// |
||||
// |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. |
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// @Authors |
||||
// Zhang Ying, zhangying913@gmail.com |
||||
// |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other GpuMaterials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors as is and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
//M*/ |
||||
|
||||
#pragma OPENCL FP_CONTRACT ON |
||||
#define UCHAR_MIN 0 |
||||
__kernel void dilate_C4_D5(__global const float4 * restrict src, __global float4 *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = get_global_id(0); |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
float4 maxVal = (float4)(-FLT_MAX); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX; j++, kX++) |
||||
{ |
||||
int current_addr = mad24(kY,srcStep,kX) + srcOffset; |
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0; |
||||
float4 v = src[current_addr]; |
||||
uchar now = mat_kernel[k++]; |
||||
float4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : (float4)(-FLT_MAX); |
||||
maxVal = max(maxVal , flag); |
||||
} |
||||
} |
||||
|
||||
if(mX < cols && mY < rows) |
||||
dst[mY * dstStep + mX + dstOffset] = (maxVal); |
||||
} |
||||
|
||||
__kernel void dilate_C1_D5(__global float4 * src, __global float *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = (get_global_id(0)<<2) - (dstOffset&3); |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
float4 maxVal = (float4)(-FLT_MAX); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX;j++, kX++) |
||||
{ |
||||
int start = mad24(kY,srcStep,kX) + srcOffset; |
||||
start = ((start < end_addr) && (start > 0)) ? start : 0; |
||||
int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0; |
||||
float8 sVal = (float8)(src[start>>2], src[start2>>2]); |
||||
|
||||
float sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7}; |
||||
int det = start & 3; |
||||
float4 v=(float4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]); |
||||
uchar now = mat_kernel[k++]; |
||||
float4 flag = (kY >= minrows & kY <= maxrows & now != 0) ? v : maxVal; |
||||
flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : -FLT_MAX; |
||||
flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : -FLT_MAX; |
||||
flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : -FLT_MAX; |
||||
flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : -FLT_MAX; |
||||
|
||||
maxVal = max(maxVal , flag); |
||||
} |
||||
} |
||||
if(mY < rows && mX < cols) |
||||
{ |
||||
__global float4* d = (__global float4*)(dst + mY * dstStep + mX + dstOffset); |
||||
float4 dVal = *d; |
||||
maxVal.x = (mX >=0 & mX < cols) ? maxVal.x : dVal.x; |
||||
maxVal.y = (mX+1 >=0 & mX+1 < cols) ? maxVal.y : dVal.y; |
||||
maxVal.z = (mX+2 >=0 & mX+2 < cols) ? maxVal.z : dVal.z; |
||||
maxVal.w = (mX+3 >=0 & mX+3 < cols) ? maxVal.w : dVal.w; |
||||
|
||||
*d = (maxVal); |
||||
} |
||||
} |
||||
|
||||
__kernel void dilate_C1_D0(__global const uchar4 * restrict src, __global uchar *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = (get_global_id(0)<<2) - (dstOffset&3);; |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
uchar4 maxVal = (uchar4)(UCHAR_MIN); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX;j++, kX++) |
||||
{ |
||||
int start = mad24(kY,srcStep,kX) + srcOffset; |
||||
start = ((start < end_addr) && (start > 0)) ? start : 0; |
||||
int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0; |
||||
uchar8 sVal = (uchar8)(src[start>>2], src[start2>>2]); |
||||
|
||||
uchar sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7}; |
||||
int det = start & 3; |
||||
uchar4 v=(uchar4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]); |
||||
|
||||
uchar4 flag = (kY >= minrows & kY <= maxrows & mat_kernel[k++] != 0) ? v : maxVal; |
||||
flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : UCHAR_MIN; |
||||
flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : UCHAR_MIN; |
||||
flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : UCHAR_MIN; |
||||
flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : UCHAR_MIN; |
||||
|
||||
maxVal = max(maxVal , flag); |
||||
} |
||||
} |
||||
if(mY < rows) |
||||
{ |
||||
__global uchar4* d = (__global uchar4*)(dst + mY * dstStep + mX + dstOffset); |
||||
uchar4 dVal = *d; |
||||
|
||||
maxVal.x = (mX >=0 & mX < cols) ? maxVal.x : dVal.x; |
||||
maxVal.y = (mX+1 >=0 & mX+1 < cols) ? maxVal.y : dVal.y; |
||||
maxVal.z = (mX+2 >=0 & mX+2 < cols) ? maxVal.z : dVal.z; |
||||
maxVal.w = (mX+3 >=0 & mX+3 < cols) ? maxVal.w : dVal.w; |
||||
|
||||
*d = (maxVal); |
||||
} |
||||
} |
||||
|
||||
__kernel void dilate_C4_D0(__global const uchar4 * restrict src, __global uchar4 *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = get_global_id(0); |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
uchar4 maxVal = (uchar4)(UCHAR_MIN); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX;j++, kX++) |
||||
{ |
||||
int current_addr = mad24(kY,srcStep,kX) + srcOffset; |
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0; |
||||
uchar4 v = src[current_addr]; |
||||
uchar now = mat_kernel[k++]; |
||||
uchar4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : maxVal; |
||||
maxVal = max(maxVal , flag); |
||||
} |
||||
} |
||||
|
||||
if(mX < cols && mY < rows) |
||||
dst[mY * dstStep + mX + dstOffset] = (maxVal); |
||||
} |
||||
|
@ -1,183 +0,0 @@ |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. |
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// @Authors |
||||
// Niko Li, newlife20080214@gmail.com |
||||
// Zero Lin, zero.lin@amd.com |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other oclMaterials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors as is and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
// |
||||
|
||||
__kernel void erode_C4_D5(__global const float4 * restrict src, __global float4 *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = get_global_id(0); |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
float4 minVal = (float4)(3.4e+38); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX; j++, kX++) |
||||
{ |
||||
int current_addr = mad24(kY,srcStep,kX) + srcOffset; |
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0; |
||||
float4 v = src[current_addr]; |
||||
uchar now = mat_kernel[k++]; |
||||
float4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : (float4)(3.4e+38); |
||||
minVal = min(minVal , flag); |
||||
} |
||||
} |
||||
|
||||
if(mX < cols && mY < rows) |
||||
dst[mY * dstStep + mX + dstOffset] = (minVal); |
||||
} |
||||
|
||||
__kernel void erode_C1_D5(__global float4 * src, __global float *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = (get_global_id(0)<<2) - (dstOffset&3); |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
float4 minVal = (float4)(3.4e+38); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX;j++, kX++) |
||||
{ |
||||
int start = mad24(kY,srcStep,kX) + srcOffset; |
||||
start = ((start < end_addr) && (start > 0)) ? start : 0; |
||||
int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0; |
||||
float8 sVal = (float8)(src[start>>2], src[start2>>2]); |
||||
|
||||
float sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7}; |
||||
int det = start & 3; |
||||
float4 v=(float4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]); |
||||
uchar now = mat_kernel[k++]; |
||||
float4 flag = (kY >= minrows & kY <= maxrows & now != 0) ? v : (float4)(3.4e+38); |
||||
flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : 3.4e+38; |
||||
flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : 3.4e+38; |
||||
flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : 3.4e+38; |
||||
flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : 3.4e+38; |
||||
|
||||
minVal = min(minVal , flag); |
||||
} |
||||
} |
||||
|
||||
if(mY < rows && mX < cols) |
||||
{ |
||||
__global float4* d = (__global float4*)(dst + mY * dstStep + mX + dstOffset); |
||||
float4 dVal = *d; |
||||
minVal.x = (mX >=0 & mX < cols) ? minVal.x : dVal.x; |
||||
minVal.y = (mX+1 >=0 & mX+1 < cols) ? minVal.y : dVal.y; |
||||
minVal.z = (mX+2 >=0 & mX+2 < cols) ? minVal.z : dVal.z; |
||||
minVal.w = (mX+3 >=0 & mX+3 < cols) ? minVal.w : dVal.w; |
||||
|
||||
*d = (minVal); |
||||
} |
||||
} |
||||
|
||||
__kernel void erode_C1_D0(__global const uchar4 * restrict src, __global uchar *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = (get_global_id(0)<<2) - (dstOffset&3); |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
uchar4 minVal = (uchar4)(0xff); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX;j++, kX++) |
||||
{ |
||||
int start = mad24(kY,srcStep,kX) + srcOffset; |
||||
start = ((start < end_addr) && (start > 0)) ? start : 0; |
||||
int start2 = ((start + 4 < end_addr) && (start > 0)) ? start + 4 : 0; |
||||
uchar8 sVal = (uchar8)(src[start>>2], src[start2>>2]); |
||||
|
||||
uchar sAry[8]= {sVal.s0, sVal.s1, sVal.s2, sVal.s3, sVal.s4, sVal.s5, sVal.s6, sVal.s7}; |
||||
int det = start & 3; |
||||
uchar4 v=(uchar4)(sAry[det], sAry[det+1], sAry[det+2], sAry[det+3]); |
||||
|
||||
uchar4 flag = (kY >= minrows & kY <= maxrows & mat_kernel[k++] != 0) ? v : (uchar4)(0xff); |
||||
flag.x = (kX >= mincols & kX <= maxcols) ? flag.x : 0xff; |
||||
flag.y = (kX+1 >= mincols & kX+1 <= maxcols) ? flag.y : 0xff; |
||||
flag.z = (kX+2 >= mincols & kX+2 <= maxcols) ? flag.z : 0xff; |
||||
flag.w = (kX+3 >= mincols & kX+3 <= maxcols) ? flag.w : 0xff; |
||||
|
||||
minVal = min(minVal , flag); |
||||
} |
||||
} |
||||
|
||||
if(mY < rows) |
||||
{ |
||||
__global uchar4* d = (__global uchar4*)(dst + mY * dstStep + mX + dstOffset); |
||||
uchar4 dVal = *d; |
||||
|
||||
minVal.x = (mX >=0 & mX < cols) ? minVal.x : dVal.x; |
||||
minVal.y = (mX+1 >=0 & mX+1 < cols) ? minVal.y : dVal.y; |
||||
minVal.z = (mX+2 >=0 & mX+2 < cols) ? minVal.z : dVal.z; |
||||
minVal.w = (mX+3 >=0 & mX+3 < cols) ? minVal.w : dVal.w; |
||||
|
||||
*d = (minVal); |
||||
} |
||||
} |
||||
|
||||
__kernel void erode_C4_D0(__global const uchar4 * restrict src, __global uchar4 *dst, int srcOffset, int dstOffset, |
||||
int mincols, int maxcols, int minrows, int maxrows, int cols, int rows, |
||||
int srcStep, int dstStep, __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows) |
||||
{ |
||||
int mX = get_global_id(0); |
||||
int mY = get_global_id(1); |
||||
int kX = mX - anX, kY = mY - anY; |
||||
int end_addr = mad24(src_whole_rows-1,srcStep,src_whole_cols); |
||||
uchar4 minVal = (uchar4)(0xff); |
||||
int k=0; |
||||
for(int i=0;i<ksY;i++, kY++ , kX = mX - anX) |
||||
{ |
||||
for(int j=0;j<ksX;j++, kX++) |
||||
{ |
||||
int current_addr = mad24(kY,srcStep,kX) + srcOffset; |
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0; |
||||
uchar4 v = src[current_addr]; |
||||
uchar now = mat_kernel[k++]; |
||||
uchar4 flag = (kX >= mincols & kX <= maxcols & kY >= minrows & kY <= maxrows & now != 0) ? v : (uchar4)(0xff); |
||||
minVal = min(minVal , flag); |
||||
} |
||||
} |
||||
|
||||
if(mX < cols && mY < rows) |
||||
dst[mY * dstStep + mX + dstOffset] = (minVal); |
||||
} |
||||
|
@ -0,0 +1,204 @@ |
||||
// License Agreement |
||||
// For Open Source Computer Vision Library |
||||
// |
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. |
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. |
||||
// Third party copyrights are property of their respective owners. |
||||
// |
||||
// @Authors |
||||
// Niko Li, newlife20080214@gmail.com |
||||
// Zero Lin, zero.lin@amd.com |
||||
// Redistribution and use in source and binary forms, with or without modification, |
||||
// are permitted provided that the following conditions are met: |
||||
// |
||||
// * Redistribution's of source code must retain the above copyright notice, |
||||
// this list of conditions and the following disclaimer. |
||||
// |
||||
// * Redistribution's in binary form must reproduce the above copyright notice, |
||||
// this list of conditions and the following disclaimer in the documentation |
||||
// and/or other oclMaterials provided with the distribution. |
||||
// |
||||
// * The name of the copyright holders may not be used to endorse or promote products |
||||
// derived from this software without specific prior written permission. |
||||
// |
||||
// This software is provided by the copyright holders and contributors as is and |
||||
// any express or implied warranties, including, but not limited to, the implied |
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed. |
||||
// In no event shall the Intel Corporation or contributors be liable for any direct, |
||||
// indirect, incidental, special, exemplary, or consequential damages |
||||
// (including, but not limited to, procurement of substitute goods or services; |
||||
// loss of use, data, or profits; or business interruption) however caused |
||||
// and on any theory of liability, whether in contract, strict liability, |
||||
// or tort (including negligence or otherwise) arising in any way out of |
||||
// the use of this software, even if advised of the possibility of such damage. |
||||
// |
||||
// |
||||
|
||||
|
||||
#ifdef ERODE |
||||
#define MORPH_OP(A,B) min((A),(B)) |
||||
#endif |
||||
#ifdef DILATE |
||||
#define MORPH_OP(A,B) max((A),(B)) |
||||
#endif |
||||
//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii |
||||
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2) |
||||
#ifndef GENTYPE |
||||
__kernel void morph_C1_D0(__global const uchar * restrict src, |
||||
__global uchar *dst, |
||||
int src_offset_x, int src_offset_y, |
||||
int cols, int rows, |
||||
int src_step_in_pixel, int dst_step_in_pixel, |
||||
__constant uchar * mat_kernel, |
||||
int src_whole_cols, int src_whole_rows, |
||||
int dst_offset_in_pixel) |
||||
{ |
||||
int l_x = get_local_id(0); |
||||
int l_y = get_local_id(1); |
||||
int x = get_group_id(0)*4*LSIZE0; |
||||
int y = get_group_id(1)*LSIZE1; |
||||
int start_x = x+src_offset_x-RADIUSX & 0xfffffffc; |
||||
int end_x = x + src_offset_x+LSIZE0*4+RADIUSX & 0xfffffffc; |
||||
int width = (end_x -start_x+4)>>2; |
||||
int offset = src_offset_x-RADIUSX & 3; |
||||
int start_y = y+src_offset_y-RADIUSY; |
||||
int point1 = mad24(l_y,LSIZE0,l_x); |
||||
int point2 = point1 + LSIZE0*LSIZE1; |
||||
int tl_x = (point1 % width)<<2; |
||||
int tl_y = point1 / width; |
||||
int tl_x2 = (point2 % width)<<2; |
||||
int tl_y2 = point2 / width; |
||||
int cur_x = start_x + tl_x; |
||||
int cur_y = start_y + tl_y; |
||||
int cur_x2 = start_x + tl_x2; |
||||
int cur_y2 = start_y + tl_y2; |
||||
int start_addr = mad24(cur_y,src_step_in_pixel,cur_x); |
||||
int start_addr2 = mad24(cur_y2,src_step_in_pixel,cur_x2); |
||||
uchar4 temp0,temp1; |
||||
__local uchar4 LDS_DAT[2*LSIZE1*LSIZE0]; |
||||
|
||||
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols); |
||||
//read pixels from src |
||||
start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0; |
||||
start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0; |
||||
temp0 = *(__global uchar4*)&src[start_addr]; |
||||
temp1 = *(__global uchar4*)&src[start_addr2]; |
||||
//judge if read out of boundary |
||||
temp0.x= ELEM(cur_x,0,src_whole_cols,VAL,temp0.x); |
||||
temp0.y= ELEM(cur_x+1,0,src_whole_cols,VAL,temp0.y); |
||||
temp0.z= ELEM(cur_x+2,0,src_whole_cols,VAL,temp0.z); |
||||
temp0.w= ELEM(cur_x+3,0,src_whole_cols,VAL,temp0.w); |
||||
temp0= ELEM(cur_y,0,src_whole_rows,(uchar4)VAL,temp0); |
||||
|
||||
temp1.x= ELEM(cur_x2,0,src_whole_cols,VAL,temp1.x); |
||||
temp1.y= ELEM(cur_x2+1,0,src_whole_cols,VAL,temp1.y); |
||||
temp1.z= ELEM(cur_x2+2,0,src_whole_cols,VAL,temp1.z); |
||||
temp1.w= ELEM(cur_x2+3,0,src_whole_cols,VAL,temp1.w); |
||||
temp1= ELEM(cur_y2,0,src_whole_rows,(uchar4)VAL,temp1); |
||||
|
||||
LDS_DAT[point1] = temp0; |
||||
LDS_DAT[point2] = temp1; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
uchar4 res = (uchar4)VAL; |
||||
for(int i=0;i<2*RADIUSY+1;i++) |
||||
for(int j=0;j<2*RADIUSX+1;j++) |
||||
{ |
||||
res =mat_kernel[i*(2*RADIUSX+1)+j]? MORPH_OP(res,vload4(0,(__local uchar*)&LDS_DAT[mad24((l_y+i),width,l_x)]+offset+j)):res; |
||||
} |
||||
int gidx = get_global_id(0)<<2; |
||||
int gidy = get_global_id(1); |
||||
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel); |
||||
if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3==0)) |
||||
{ |
||||
*(__global uchar4*)&dst[out_addr] = res; |
||||
} |
||||
else |
||||
{ |
||||
if(gidx+3<cols && gidy<rows) |
||||
{ |
||||
dst[out_addr] = res.x; |
||||
dst[out_addr+1] = res.y; |
||||
dst[out_addr+2] = res.z; |
||||
dst[out_addr+3] = res.w; |
||||
} |
||||
else if(gidx+2<cols && gidy<rows) |
||||
{ |
||||
dst[out_addr] = res.x; |
||||
dst[out_addr+1] = res.y; |
||||
dst[out_addr+2] = res.z; |
||||
} |
||||
else if(gidx+1<cols && gidy<rows) |
||||
{ |
||||
dst[out_addr] = res.x; |
||||
dst[out_addr+1] = res.y; |
||||
} |
||||
else if(gidx<cols && gidy<rows) |
||||
{ |
||||
dst[out_addr] = res.x; |
||||
} |
||||
} |
||||
} |
||||
#else |
||||
__kernel void morph(__global const GENTYPE * restrict src, |
||||
__global GENTYPE *dst, |
||||
int src_offset_x, int src_offset_y, |
||||
int cols, int rows, |
||||
int src_step_in_pixel, int dst_step_in_pixel, |
||||
__constant uchar * mat_kernel, |
||||
int src_whole_cols, int src_whole_rows, |
||||
int dst_offset_in_pixel) |
||||
{ |
||||
int l_x = get_local_id(0); |
||||
int l_y = get_local_id(1); |
||||
int x = get_group_id(0)*LSIZE0; |
||||
int y = get_group_id(1)*LSIZE1; |
||||
int start_x = x+src_offset_x-RADIUSX; |
||||
int end_x = x + src_offset_x+LSIZE0+RADIUSX; |
||||
int width = end_x -start_x+1; |
||||
int start_y = y+src_offset_y-RADIUSY; |
||||
int point1 = mad24(l_y,LSIZE0,l_x); |
||||
int point2 = point1 + LSIZE0*LSIZE1; |
||||
int tl_x = point1 % width; |
||||
int tl_y = point1 / width; |
||||
int tl_x2 = point2 % width; |
||||
int tl_y2 = point2 / width; |
||||
int cur_x = start_x + tl_x; |
||||
int cur_y = start_y + tl_y; |
||||
int cur_x2 = start_x + tl_x2; |
||||
int cur_y2 = start_y + tl_y2; |
||||
int start_addr = mad24(cur_y,src_step_in_pixel,cur_x); |
||||
int start_addr2 = mad24(cur_y2,src_step_in_pixel,cur_x2); |
||||
GENTYPE temp0,temp1; |
||||
__local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0]; |
||||
|
||||
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols); |
||||
//read pixels from src |
||||
start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0; |
||||
start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0; |
||||
temp0 = src[start_addr]; |
||||
temp1 = src[start_addr2]; |
||||
//judge if read out of boundary |
||||
temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0); |
||||
temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0); |
||||
|
||||
temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1); |
||||
temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1); |
||||
|
||||
LDS_DAT[point1] = temp0; |
||||
LDS_DAT[point2] = temp1; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
GENTYPE res = (GENTYPE)VAL; |
||||
for(int i=0;i<2*RADIUSY+1;i++) |
||||
for(int j=0;j<2*RADIUSX+1;j++) |
||||
{ |
||||
res =mat_kernel[i*(2*RADIUSX+1)+j]? MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)]):res; |
||||
} |
||||
int gidx = get_global_id(0); |
||||
int gidy = get_global_id(1); |
||||
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel); |
||||
if(gidx<cols && gidy<rows) |
||||
{ |
||||
dst[out_addr] = res; |
||||
} |
||||
} |
||||
#endif |
Loading…
Reference in new issue