@ -103,12 +103,8 @@ namespace cv
static void threshold_8u ( const oclMat & src , oclMat & dst , double thresh , double maxVal , int type )
{
CV_Assert ( ( src . cols = = dst . cols ) & & ( src . rows = = dst . rows ) ) ;
Context * clCxt = src . clCxt ;
uchar thresh_uchar = cvFloor ( thresh ) ;
uchar max_val = cvRound ( maxVal ) ;
String kernelName = " threshold " ;
size_t cols = ( dst . cols + ( dst . offset % 16 ) + 15 ) / 16 ;
size_t bSizeX = 16 , bSizeY = 16 ;
@ -129,14 +125,11 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_uchar ) , ( void * ) & thresh_uchar ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_uchar ) , ( void * ) & max_val ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & type ) ) ;
openCLExecuteKernel ( clCxt , & imgproc_threshold , kernelName , globalThreads , localThreads , args , src . oclchannels ( ) , src . depth ( ) ) ;
openCLExecuteKernel ( src . clCxt , & imgproc_threshold , " threshold " , globalThreads , localThreads , args , src . oclchannels ( ) , src . depth ( ) ) ;
}
static void threshold_32f ( const oclMat & src , oclMat & dst , double thresh , double maxVal , int type )
{
CV_Assert ( ( src . cols = = dst . cols ) & & ( src . rows = = dst . rows ) ) ;
Context * clCxt = src . clCxt ;
float thresh_f = thresh ;
float max_val = maxVal ;
int dst_offset = ( dst . offset > > 2 ) ;
@ -144,10 +137,7 @@ namespace cv
int src_offset = ( src . offset > > 2 ) ;
int src_step = ( src . step > > 2 ) ;
String kernelName = " threshold " ;
size_t cols = ( dst . cols + ( dst_offset & 3 ) + 3 ) / 4 ;
//size_t cols = dst.cols;
size_t bSizeX = 16 , bSizeY = 16 ;
size_t gSizeX = cols % bSizeX = = 0 ? cols : ( cols + bSizeX - 1 ) / bSizeX * bSizeX ;
size_t gSizeY = dst . rows ;
@ -166,8 +156,8 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_float ) , ( void * ) & thresh_f ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_float ) , ( void * ) & max_val ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & type ) ) ;
openCLExecuteKernel ( clCxt , & imgproc_threshold , kernelName , globalThreads , localThreads , args , src . oclchannels ( ) , src . depth ( ) ) ;
openCLExecuteKernel ( src . clCxt , & imgproc_threshold , " threshold " , globalThreads , localThreads , args , src . oclchannels ( ) , src . depth ( ) ) ;
}
// threshold: support 8UC1 and 32FC1 data type and five threshold type
@ -185,6 +175,7 @@ namespace cv
return thresh ;
}
////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////// remap //////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////
@ -200,7 +191,6 @@ namespace cv
dst . create ( map1 . size ( ) , src . type ( ) ) ;
String kernelName ;
if ( map1 . type ( ) = = CV_32FC2 & & ! map2 . data )
@ -241,11 +231,8 @@ namespace cv
glbSizeX = cols % blkSizeX = = 0 ? cols : ( cols / blkSizeX + 1 ) * blkSizeX ;
}
else
{
glbSizeX = dst . cols % blkSizeX = = 0 ? dst . cols : ( dst . cols / blkSizeX + 1 ) * blkSizeX ;
}
size_t glbSizeY = dst . rows % blkSizeY = = 0 ? dst . rows : ( dst . rows / blkSizeY + 1 ) * blkSizeY ;
size_t globalThreads [ 3 ] = { glbSizeX , glbSizeY , 1 } ;
size_t localThreads [ 3 ] = { blkSizeX , blkSizeY , 1 } ;
@ -273,14 +260,10 @@ namespace cv
float borderFloat [ 4 ] = { ( float ) borderValue [ 0 ] , ( float ) borderValue [ 1 ] , ( float ) borderValue [ 2 ] , ( float ) borderValue [ 3 ] } ;
if ( src . clCxt - > supportsFeature ( FEATURE_CL_DOUBLE ) )
{
args . push_back ( std : : make_pair ( sizeof ( cl_double4 ) , ( void * ) & borderValue ) ) ;
}
else
{
args . push_back ( std : : make_pair ( sizeof ( cl_float4 ) , ( void * ) & borderFloat ) ) ;
}
}
if ( map1 . channels ( ) = = 1 )
{
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & dst . data ) ) ;
@ -301,14 +284,10 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & map1 . rows ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & cols ) ) ;
if ( src . clCxt - > supportsFeature ( FEATURE_CL_DOUBLE ) )
{
args . push_back ( std : : make_pair ( sizeof ( cl_double4 ) , ( void * ) & borderValue ) ) ;
}
else
{
args . push_back ( std : : make_pair ( sizeof ( cl_float4 ) , ( void * ) & borderFloat ) ) ;
}
}
openCLExecuteKernel ( clCxt , & imgproc_remap , kernelName , globalThreads , localThreads , args , src . oclchannels ( ) , src . depth ( ) ) ;
}
@ -327,7 +306,7 @@ namespace cv
int srcoffset_in_pixel = src . offset / src . elemSize ( ) ;
int dstStep_in_pixel = dst . step1 ( ) / dst . oclchannels ( ) ;
int dstoffset_in_pixel = dst . offset / dst . elemSize ( ) ;
//printf("%d %d\n",src.step1() , dst.elemSize());
String kernelName ;
if ( interpolation = = INTER_LINEAR )
kernelName = " resizeLN " ;
@ -343,9 +322,8 @@ namespace cv
glbSizeX = cols % blkSizeX = = 0 & & cols ! = 0 ? cols : ( cols / blkSizeX + 1 ) * blkSizeX ;
}
else
{
glbSizeX = dst . cols % blkSizeX = = 0 & & dst . cols ! = 0 ? dst . cols : ( dst . cols / blkSizeX + 1 ) * blkSizeX ;
}
size_t glbSizeY = dst . rows % blkSizeY = = 0 & & dst . rows ! = 0 ? dst . rows : ( dst . rows / blkSizeY + 1 ) * blkSizeY ;
size_t globalThreads [ 3 ] = { glbSizeX , glbSizeY , 1 } ;
size_t localThreads [ 3 ] = { blkSizeX , blkSizeY , 1 } ;
@ -393,7 +371,6 @@ namespace cv
openCLExecuteKernel ( clCxt , & imgproc_resize , kernelName , globalThreads , localThreads , args , src . oclchannels ( ) , src . depth ( ) ) ;
}
void resize ( const oclMat & src , oclMat & dst , Size dsize ,
double fx , double fy , int interpolation )
{
@ -404,16 +381,11 @@ namespace cv
CV_Assert ( ! ( dsize = = Size ( ) ) | | ( fx > 0 & & fy > 0 ) ) ;
if ( ! ( dsize = = Size ( ) ) & & ( fx > 0 & & fy > 0 ) )
{
if ( dsize . width ! = ( int ) ( src . cols * fx ) | | dsize . height ! = ( int ) ( src . rows * fy ) )
{
CV_Error ( Error : : StsUnmatchedSizes , " invalid dsize and fx, fy! " ) ;
}
}
if ( dsize = = Size ( ) )
{
dsize = Size ( saturate_cast < int > ( src . cols * fx ) , saturate_cast < int > ( src . rows * fy ) ) ;
}
else
{
fx = ( double ) dsize . width / src . cols ;
@ -427,12 +399,13 @@ namespace cv
resize_gpu ( src , dst , fx , fy , interpolation ) ;
return ;
}
CV_Error ( Error : : StsUnsupportedFormat , " Non-supported interpolation method " ) ;
}
////////////////////////////////////////////////////////////////////////
// medianFilter
void medianFilter ( const oclMat & src , oclMat & dst , int m )
{
CV_Assert ( m % 2 = = 1 & & m > 1 ) ;
@ -452,8 +425,6 @@ namespace cv
int dstOffset = dst . offset / dst . oclchannels ( ) / dst . elemSize1 ( ) ;
Context * clCxt = src . clCxt ;
String kernelName = " medianFilter " ;
std : : vector < std : : pair < size_t , const void * > > args ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & src . data ) ) ;
@ -479,11 +450,12 @@ namespace cv
openCLExecuteKernel ( clCxt , & imgproc_median , kernelName , globalThreads , localThreads , args , src . oclchannels ( ) , src . depth ( ) ) ;
}
else
CV_Error ( Error : : StsUnsupportedFormat , " Non-supported filter length " ) ;
CV_Error ( Error : : StsBadArg , " Non-supported filter length " ) ;
}
////////////////////////////////////////////////////////////////////////
// copyMakeBorder
void copyMakeBorder ( const oclMat & src , oclMat & dst , int top , int bottom , int left , int right , int bordertype , const Scalar & scalar )
{
CV_Assert ( top > = 0 & & bottom > = 0 & & left > = 0 & & right > = 0 ) ;
@ -493,42 +465,39 @@ namespace cv
( bordertype ! = cv : : BORDER_CONSTANT ) & &
( bordertype ! = cv : : BORDER_REPLICATE ) )
{
CV_Error ( Error : : StsBadArg , " u nsupported border type" ) ;
CV_Error ( Error : : StsBadArg , " U nsupported border type" ) ;
}
}
bordertype & = ~ cv : : BORDER_ISOLATED ;
if ( ( bordertype = = cv : : BORDER_REFLECT ) | | ( bordertype = = cv : : BORDER_WRAP ) )
if ( bordertype = = cv : : BORDER_REFLECT | | bordertype = = cv : : BORDER_WRAP )
{
CV_Assert ( ( src . cols > = left ) & & ( src . cols > = right ) & & ( src . rows > = top ) & & ( src . rows > = bottom ) ) ;
}
if ( bordertype = = cv : : BORDER_REFLECT_101 )
else if ( bordertype = = cv : : BORDER_REFLECT_101 )
{
CV_Assert ( ( src . cols > left ) & & ( src . cols > right ) & & ( src . rows > top ) & & ( src . rows > bottom ) ) ;
}
dst . create ( src . rows + top + bottom , src . cols + left + right , src . type ( ) ) ;
int srcStep = src . step1 ( ) / src . oclchannels ( ) ;
int dstStep = dst . step1 ( ) / dst . oclchannels ( ) ;
int srcOffset = src . offset / src . elemSize ( ) ;
int dstOffset = dst . offset / dst . elemSize ( ) ;
int srcStep = src . step1 ( ) / src . oclchannels ( ) , dstStep = dst . step1 ( ) / dst . oclchannels ( ) ;
int srcOffset = src . offset / src . elemSize ( ) , dstOffset = dst . offset / dst . elemSize ( ) ;
int depth = src . depth ( ) , ochannels = src . oclchannels ( ) ;
int __bordertype [ ] = { cv : : BORDER_CONSTANT , cv : : BORDER_REPLICATE , BORDER_REFLECT , BORDER_WRAP , BORDER_REFLECT_101 } ;
const char * borderstr [ ] = { " BORDER_CONSTANT " , " BORDER_REPLICATE " , " BORDER_REFLECT " , " BORDER_WRAP " , " BORDER_REFLECT_101 " } ;
size_t bordertype_index ;
for ( bordertype_index = 0 ; bordertype_index < sizeof ( __bordertype ) / sizeof ( int ) ; bordertype_index + + )
{
if ( __bordertype [ bordertype_index ] = = bordertype )
break ;
}
if ( bordertype_index = = sizeof ( __bordertype ) / sizeof ( int ) )
{
CV_Error ( Error : : StsBadArg , " unsupported border type " ) ;
}
String kernelName = " copymakeborder " ;
size_t localThreads [ 3 ] = { 16 , 16 , 1 } ;
size_t globalThreads [ 3 ] = { ( dst . cols + localThreads [ 0 ] - 1 ) / localThreads [ 0 ] * localThreads [ 0 ] ,
( dst . rows + localThreads [ 1 ] - 1 ) / localThreads [ 1 ] * localThreads [ 1 ] , 1
} ;
size_t globalThreads [ 3 ] = { dst . cols , dst . rows , 1 } ;
std : : vector < std : : pair < size_t , const void * > > args ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & src . data ) ) ;
@ -543,169 +512,30 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & dstOffset ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & top ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & left ) ) ;
char compile_option [ 64 ] ;
union sc
{
cl_uchar4 uval ;
cl_char4 cval ;
cl_ushort4 usval ;
cl_short4 shval ;
cl_int4 ival ;
cl_float4 fval ;
cl_double4 dval ;
} val ;
switch ( dst . depth ( ) )
{
case CV_8U :
val . uval . s [ 0 ] = saturate_cast < uchar > ( scalar . val [ 0 ] ) ;
val . uval . s [ 1 ] = saturate_cast < uchar > ( scalar . val [ 1 ] ) ;
val . uval . s [ 2 ] = saturate_cast < uchar > ( scalar . val [ 2 ] ) ;
val . uval . s [ 3 ] = saturate_cast < uchar > ( scalar . val [ 3 ] ) ;
switch ( dst . oclchannels ( ) )
{
case 1 :
sprintf ( compile_option , " -D GENTYPE=uchar -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_uchar ) , ( void * ) & val . uval . s [ 0 ] ) ) ;
if ( ( ( dst . offset & 3 ) = = 0 ) & & ( ( dst . cols & 3 ) = = 0 ) )
const char * const typeMap [ ] = { " uchar " , " char " , " ushort " , " short " , " int " , " float " , " double " } ;
const char * const channelMap [ ] = { " " , " " , " 2 " , " 4 " , " 4 " } ;
std : : string buildOptions = format ( " -D GENTYPE=%s%s -D %s " ,
typeMap [ depth ] , channelMap [ ochannels ] ,
borderstr [ bordertype_index ] ) ;
if ( src . type ( ) = = CV_8UC1 & & ( dst . offset & 3 ) = = 0 & & ( dst . cols & 3 ) = = 0 )
{
kernelName = " copymakeborder_C1_D0 " ;
globalThreads [ 0 ] = ( dst . cols / 4 + localThreads [ 0 ] - 1 ) / localThreads [ 0 ] * localThreads [ 0 ] ;
}
break ;
case 4 :
sprintf ( compile_option , " -D GENTYPE=uchar4 -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_uchar4 ) , ( void * ) & val . uval ) ) ;
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unsupported channels " ) ;
}
break ;
case CV_8S :
val . cval . s [ 0 ] = saturate_cast < char > ( scalar . val [ 0 ] ) ;
val . cval . s [ 1 ] = saturate_cast < char > ( scalar . val [ 1 ] ) ;
val . cval . s [ 2 ] = saturate_cast < char > ( scalar . val [ 2 ] ) ;
val . cval . s [ 3 ] = saturate_cast < char > ( scalar . val [ 3 ] ) ;
switch ( dst . oclchannels ( ) )
{
case 1 :
sprintf ( compile_option , " -D GENTYPE=char -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_char ) , ( void * ) & val . cval . s [ 0 ] ) ) ;
break ;
case 4 :
sprintf ( compile_option , " -D GENTYPE=char4 -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_char4 ) , ( void * ) & val . cval ) ) ;
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unsupported channels " ) ;
}
break ;
case CV_16U :
val . usval . s [ 0 ] = saturate_cast < ushort > ( scalar . val [ 0 ] ) ;
val . usval . s [ 1 ] = saturate_cast < ushort > ( scalar . val [ 1 ] ) ;
val . usval . s [ 2 ] = saturate_cast < ushort > ( scalar . val [ 2 ] ) ;
val . usval . s [ 3 ] = saturate_cast < ushort > ( scalar . val [ 3 ] ) ;
switch ( dst . oclchannels ( ) )
{
case 1 :
sprintf ( compile_option , " -D GENTYPE=ushort -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_ushort ) , ( void * ) & val . usval . s [ 0 ] ) ) ;
break ;
case 4 :
sprintf ( compile_option , " -D GENTYPE=ushort4 -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_ushort4 ) , ( void * ) & val . usval ) ) ;
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unsupported channels " ) ;
}
break ;
case CV_16S :
val . shval . s [ 0 ] = saturate_cast < short > ( scalar . val [ 0 ] ) ;
val . shval . s [ 1 ] = saturate_cast < short > ( scalar . val [ 1 ] ) ;
val . shval . s [ 2 ] = saturate_cast < short > ( scalar . val [ 2 ] ) ;
val . shval . s [ 3 ] = saturate_cast < short > ( scalar . val [ 3 ] ) ;
switch ( dst . oclchannels ( ) )
{
case 1 :
sprintf ( compile_option , " -D GENTYPE=short -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_short ) , ( void * ) & val . shval . s [ 0 ] ) ) ;
break ;
case 4 :
sprintf ( compile_option , " -D GENTYPE=short4 -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_short4 ) , ( void * ) & val . shval ) ) ;
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unsupported channels " ) ;
}
break ;
case CV_32S :
val . ival . s [ 0 ] = saturate_cast < int > ( scalar . val [ 0 ] ) ;
val . ival . s [ 1 ] = saturate_cast < int > ( scalar . val [ 1 ] ) ;
val . ival . s [ 2 ] = saturate_cast < int > ( scalar . val [ 2 ] ) ;
val . ival . s [ 3 ] = saturate_cast < int > ( scalar . val [ 3 ] ) ;
switch ( dst . oclchannels ( ) )
{
case 1 :
sprintf ( compile_option , " -D GENTYPE=int -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & val . ival . s [ 0 ] ) ) ;
break ;
case 2 :
sprintf ( compile_option , " -D GENTYPE=int2 -D %s " , borderstr [ bordertype_index ] ) ;
cl_int2 i2val ;
i2val . s [ 0 ] = val . ival . s [ 0 ] ;
i2val . s [ 1 ] = val . ival . s [ 1 ] ;
args . push_back ( std : : make_pair ( sizeof ( cl_int2 ) , ( void * ) & i2val ) ) ;
break ;
case 4 :
sprintf ( compile_option , " -D GENTYPE=int4 -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int4 ) , ( void * ) & val . ival ) ) ;
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unsupported channels " ) ;
}
break ;
case CV_32F :
val . fval . s [ 0 ] = scalar . val [ 0 ] ;
val . fval . s [ 1 ] = scalar . val [ 1 ] ;
val . fval . s [ 2 ] = scalar . val [ 2 ] ;
val . fval . s [ 3 ] = scalar . val [ 3 ] ;
switch ( dst . oclchannels ( ) )
{
case 1 :
sprintf ( compile_option , " -D GENTYPE=float -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_float ) , ( void * ) & val . fval . s [ 0 ] ) ) ;
break ;
case 4 :
sprintf ( compile_option , " -D GENTYPE=float4 -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_float4 ) , ( void * ) & val . fval ) ) ;
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unsupported channels " ) ;
}
break ;
case CV_64F :
val . dval . s [ 0 ] = scalar . val [ 0 ] ;
val . dval . s [ 1 ] = scalar . val [ 1 ] ;
val . dval . s [ 2 ] = scalar . val [ 2 ] ;
val . dval . s [ 3 ] = scalar . val [ 3 ] ;
switch ( dst . oclchannels ( ) )
{
case 1 :
sprintf ( compile_option , " -D GENTYPE=double -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_double ) , ( void * ) & val . dval . s [ 0 ] ) ) ;
break ;
case 4 :
sprintf ( compile_option , " -D GENTYPE=double4 -D %s " , borderstr [ bordertype_index ] ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_double4 ) , ( void * ) & val . dval ) ) ;
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unsupported channels " ) ;
}
break ;
default :
CV_Error ( Error : : StsUnsupportedFormat , " unknown depth " ) ;
globalThreads [ 0 ] = dst . cols > > 2 ;
}
openCLExecuteKernel ( src . clCxt , & imgproc_copymakeboder , kernelName , globalThreads , localThreads , args , - 1 , - 1 , compile_option ) ;
int cn = src . channels ( ) , ocn = src . oclchannels ( ) ;
int bufSize = src . elemSize1 ( ) * ocn ;
AutoBuffer < uchar > _buf ( bufSize ) ;
uchar * buf = ( uchar * ) _buf ;
scalarToRawData ( scalar , buf , dst . type ( ) ) ;
memset ( buf + src . elemSize1 ( ) * cn , 0 , ( ocn - cn ) * src . elemSize1 ( ) ) ;
args . push_back ( std : : make_pair ( bufSize , ( void * ) buf ) ) ;
openCLExecuteKernel ( src . clCxt , & imgproc_copymakeboder , kernelName , globalThreads ,
localThreads , args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
}
////////////////////////////////////////////////////////////////////////
@ -784,31 +614,31 @@ namespace cv
String s [ 3 ] = { " NN " , " Linear " , " Cubic " } ;
String kernelName = " warpAffine " + s [ interpolation ] ;
if ( src . clCxt - > supportsFeature ( FEATURE_CL_DOUBLE ) )
{
cl_int st ;
coeffs_cm = clCreateBuffer ( * ( cl_context * ) clCxt - > getOpenCLContextPtr ( ) , CL_MEM_READ_WRITE , sizeof ( F ) * 2 * 3 , NULL , & st ) ;
openCLVerifyCall ( st ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm , 1 , 0 , sizeof ( F ) * 2 * 3 , coeffs , 0 , 0 , 0 ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm , 1 , 0 ,
sizeof ( F ) * 2 * 3 , coeffs , 0 , 0 , 0 ) ) ;
}
else
{
cl_int st ;
for ( int m = 0 ; m < 2 ; m + + )
for ( int n = 0 ; n < 3 ; n + + )
{
float_coeffs [ m ] [ n ] = coeffs [ m ] [ n ] ;
}
coeffs_cm = clCreateBuffer ( * ( cl_context * ) clCxt - > getOpenCLContextPtr ( ) , CL_MEM_READ_WRITE , sizeof ( float ) * 2 * 3 , NULL , & st ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm , 1 , 0 , sizeof ( float ) * 2 * 3 , float_coeffs , 0 , 0 , 0 ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm ,
1 , 0 , sizeof ( float ) * 2 * 3 , float_coeffs , 0 , 0 , 0 ) ) ;
}
//TODO: improve this kernel
size_t blkSizeX = 16 , blkSizeY = 16 ;
size_t glbSizeX ;
size_t cols ;
//if(src.type() == CV_8UC1 && interpolation != 2)
if ( src . type ( ) = = CV_8UC1 & & interpolation ! = 2 )
{
cols = ( dst . cols + dst . offset % 4 + 3 ) / 4 ;
@ -819,6 +649,7 @@ namespace cv
cols = dst . cols ;
glbSizeX = dst . cols % blkSizeX = = 0 ? dst . cols : ( dst . cols / blkSizeX + 1 ) * blkSizeX ;
}
size_t glbSizeY = dst . rows % blkSizeY = = 0 ? dst . rows : ( dst . rows / blkSizeY + 1 ) * blkSizeY ;
size_t globalThreads [ 3 ] = { glbSizeX , glbSizeY , 1 } ;
size_t localThreads [ 3 ] = { blkSizeX , blkSizeY , 1 } ;
@ -842,7 +673,6 @@ namespace cv
openCLSafeCall ( clReleaseMemObject ( coeffs_cm ) ) ;
}
void warpPerspective_gpu ( const oclMat & src , oclMat & dst , double coeffs [ 3 ] [ 3 ] , int interpolation )
{
CV_Assert ( ( src . oclchannels ( ) = = dst . oclchannels ( ) ) ) ;
@ -860,7 +690,8 @@ namespace cv
cl_int st ;
coeffs_cm = clCreateBuffer ( * ( cl_context * ) clCxt - > getOpenCLContextPtr ( ) , CL_MEM_READ_WRITE , sizeof ( double ) * 3 * 3 , NULL , & st ) ;
openCLVerifyCall ( st ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm , 1 , 0 , sizeof ( double ) * 3 * 3 , coeffs , 0 , 0 , 0 ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm , 1 , 0 ,
sizeof ( double ) * 3 * 3 , coeffs , 0 , 0 , 0 ) ) ;
}
else
{
@ -871,8 +702,10 @@ namespace cv
coeffs_cm = clCreateBuffer ( * ( cl_context * ) clCxt - > getOpenCLContextPtr ( ) , CL_MEM_READ_WRITE , sizeof ( float ) * 3 * 3 , NULL , & st ) ;
openCLVerifyCall ( st ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm , 1 , 0 , sizeof ( float ) * 3 * 3 , float_coeffs , 0 , 0 , 0 ) ) ;
openCLSafeCall ( clEnqueueWriteBuffer ( * ( cl_command_queue * ) clCxt - > getOpenCLCommandQueuePtr ( ) , ( cl_mem ) coeffs_cm , 1 , 0 ,
sizeof ( float ) * 3 * 3 , float_coeffs , 0 , 0 , 0 ) ) ;
}
//TODO: improve this kernel
size_t blkSizeX = 16 , blkSizeY = 16 ;
size_t glbSizeX ;
@ -883,12 +716,11 @@ namespace cv
glbSizeX = cols % blkSizeX = = 0 ? cols : ( cols / blkSizeX + 1 ) * blkSizeX ;
}
else
/*
*/
{
cols = dst . cols ;
glbSizeX = dst . cols % blkSizeX = = 0 ? dst . cols : ( dst . cols / blkSizeX + 1 ) * blkSizeX ;
}
size_t glbSizeY = dst . rows % blkSizeY = = 0 ? dst . rows : ( dst . rows / blkSizeY + 1 ) * blkSizeY ;
size_t globalThreads [ 3 ] = { glbSizeX , glbSizeY , 1 } ;
size_t localThreads [ 3 ] = { blkSizeX , blkSizeY , 1 } ;
@ -931,9 +763,7 @@ namespace cv
Mat coeffsMat ( 2 , 3 , CV_64F , ( void * ) coeffsM ) ;
M . convertTo ( coeffsMat , coeffsMat . type ( ) ) ;
if ( ! warpInd )
{
convert_coeffs ( coeffsM ) ;
}
for ( int i = 0 ; i < 2 ; + + i )
for ( int j = 0 ; j < 3 ; + + j )
@ -961,9 +791,7 @@ namespace cv
Mat coeffsMat ( 3 , 3 , CV_64F , ( void * ) coeffsM ) ;
M . convertTo ( coeffsMat , coeffsMat . type ( ) ) ;
if ( ! warpInd )
{
invert ( coeffsM ) ;
}
for ( int i = 0 ; i < 3 ; + + i )
for ( int j = 0 ; j < 3 ; + + j )
@ -974,12 +802,13 @@ namespace cv
////////////////////////////////////////////////////////////////////////
// integral
void integral ( const oclMat & src , oclMat & sum , oclMat & sqsum )
{
CV_Assert ( src . type ( ) = = CV_8UC1 ) ;
if ( ! src . clCxt - > supportsFeature ( ocl : : FEATURE_CL_DOUBLE ) & & src . depth ( ) = = CV_64F )
{
CV_Error ( Error : : Gpu NotSupported, " select device do n't support double" ) ;
CV_Error ( Error : : OpenCLDouble NotSupported, " Select device does n't support double" ) ;
return ;
}
@ -1073,6 +902,7 @@ namespace cv
}
/////////////////////// corner //////////////////////////////
static void extractCovData ( const oclMat & src , oclMat & Dx , oclMat & Dy ,
int blockSize , int ksize , int borderType )
{
@ -1087,9 +917,8 @@ namespace cv
scale = 1. / scale ;
}
else
{
scale = 1. / scale ;
}
if ( ksize > 0 )
{
Sobel ( src , Dx , CV_32F , 1 , 0 , ksize , scale , 0 , borderType ) ;
@ -1122,10 +951,10 @@ namespace cv
sprintf ( borderType , " BORDER_REPLICATE " ) ;
break ;
default :
std : : cout < < " BORDER type is not supported! " < < std : : endl ;
CV_Error ( Error : : StsBadFlag , " BORDER type is not supported! " ) ;
}
char build_options [ 150 ] ;
sprintf ( build_options , " -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s " ,
std : : string buildOptions = format ( " -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s " ,
block_size / 2 , block_size / 2 , block_size , block_size , borderType ) ;
size_t blockSizeX = 256 , blockSizeY = 1 ;
@ -1155,7 +984,7 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & dst . cols ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & dst . step ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_float ) , ( void * ) & k ) ) ;
openCLExecuteKernel ( dst . clCxt , source , kernelName , gt , lt , args , - 1 , - 1 , build_options ) ;
openCLExecuteKernel ( dst . clCxt , source , kernelName , gt , lt , args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
}
void cornerHarris ( const oclMat & src , oclMat & dst , int blockSize , int ksize ,
@ -1170,10 +999,13 @@ namespace cv
{
if ( ! src . clCxt - > supportsFeature ( FEATURE_CL_DOUBLE ) & & src . depth ( ) = = CV_64F )
{
CV_Error ( Error : : GpuNotSupported , " select device don't support double " ) ;
CV_Error ( Error : : OpenCLDoubleNotSupported , " Select device doesn't support double " ) ;
return ;
}
CV_Assert ( src . cols > = blockSize / 2 & & src . rows > = blockSize / 2 ) ;
CV_Assert ( borderType = = cv : : BORDER_CONSTANT | | borderType = = cv : : BORDER_REFLECT101 | | borderType = = cv : : BORDER_REPLICATE | | borderType = = cv : : BORDER_REFLECT ) ;
CV_Assert ( borderType = = cv : : BORDER_CONSTANT | | borderType = = cv : : BORDER_REFLECT101 | | borderType = = cv : : BORDER_REPLICATE
| | borderType = = cv : : BORDER_REFLECT ) ;
extractCovData ( src , dx , dy , blockSize , ksize , borderType ) ;
dst . create ( src . size ( ) , CV_32F ) ;
corner_ocl ( & imgproc_calcHarris , " calcHarris " , blockSize , static_cast < float > ( k ) , dx , dy , dst , borderType ) ;
@ -1189,20 +1021,24 @@ namespace cv
{
if ( ! src . clCxt - > supportsFeature ( FEATURE_CL_DOUBLE ) & & src . depth ( ) = = CV_64F )
{
CV_Error ( Error : : GpuNotSupported , " select device don't support double " ) ;
CV_Error ( Error : : OpenCLDoubleNotSupported , " select device don't support double " ) ;
return ;
}
CV_Assert ( src . cols > = blockSize / 2 & & src . rows > = blockSize / 2 ) ;
CV_Assert ( borderType = = cv : : BORDER_CONSTANT | | borderType = = cv : : BORDER_REFLECT101 | | borderType = = cv : : BORDER_REPLICATE | | borderType = = cv : : BORDER_REFLECT ) ;
extractCovData ( src , dx , dy , blockSize , ksize , borderType ) ;
dst . create ( src . size ( ) , CV_32F ) ;
corner_ocl ( & imgproc_calcMinEigenVal , " calcMinEigenVal " , blockSize , 0 , dx , dy , dst , borderType ) ;
}
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
static void meanShiftFiltering_gpu ( const oclMat & src , oclMat dst , int sp , int sr , int maxIter , float eps )
{
CV_Assert ( ( src . cols = = dst . cols ) & & ( src . rows = = dst . rows ) ) ;
CV_Assert ( ! ( dst . step & 0x3 ) ) ;
Context * clCxt = src . clCxt ;
//Arrange the NDRange
int col = src . cols , row = src . rows ;
@ -1229,7 +1065,8 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & sr ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & maxIter ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_float ) , ( void * ) & eps ) ) ;
openCLExecuteKernel ( clCxt , & meanShift , " meanshift_kernel " , globalThreads , localThreads , args , - 1 , - 1 ) ;
openCLExecuteKernel ( src . clCxt , & meanShift , " meanshift_kernel " , globalThreads , localThreads , args , - 1 , - 1 ) ;
}
void meanShiftFiltering ( const oclMat & src , oclMat & dst , int sp , int sr , TermCriteria criteria )
@ -1253,7 +1090,6 @@ namespace cv
eps = ( float ) std : : max ( criteria . epsilon , 0.0 ) ;
meanShiftFiltering_gpu ( src , dst , sp , sr , maxIter , eps ) ;
}
static void meanShiftProc_gpu ( const oclMat & src , oclMat dstr , oclMat dstsp , int sp , int sr , int maxIter , float eps )
@ -1262,7 +1098,6 @@ namespace cv
CV_Assert ( ( src . cols = = dstr . cols ) & & ( src . rows = = dstr . rows ) & &
( src . rows = = dstsp . rows ) & & ( src . cols = = dstsp . cols ) ) ;
CV_Assert ( ! ( dstsp . step & 0x3 ) ) ;
Context * clCxt = src . clCxt ;
//Arrange the NDRange
int col = src . cols , row = src . rows ;
@ -1292,7 +1127,8 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & sr ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & maxIter ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_float ) , ( void * ) & eps ) ) ;
openCLExecuteKernel ( clCxt , & meanShift , " meanshiftproc_kernel " , globalThreads , localThreads , args , - 1 , - 1 ) ;
openCLExecuteKernel ( src . clCxt , & meanShift , " meanshiftproc_kernel " , globalThreads , localThreads , args , - 1 , - 1 ) ;
}
void meanShiftProc ( const oclMat & src , oclMat & dstr , oclMat & dstsp , int sp , int sr , TermCriteria criteria )
@ -1303,6 +1139,12 @@ namespace cv
if ( src . depth ( ) ! = CV_8U | | src . oclchannels ( ) ! = 4 )
CV_Error ( Error : : StsUnsupportedFormat , " Only 8-bit, 4-channel images are supported " ) ;
// if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
// {
// CV_Error(Error::OpenCLDoubleNotSupportedNotSupported, "Selected device doesn't support double, so a deviation exists.\nIf the accuracy is acceptable, the error can be ignored.\n");
// return;
// }
dstr . create ( src . size ( ) , CV_8UC4 ) ;
dstsp . create ( src . size ( ) , CV_16SC2 ) ;
@ -1322,6 +1164,7 @@ namespace cv
///////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////hist///////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
namespace histograms
{
const int PARTIAL_HISTOGRAM256_COUNT = 256 ;
@ -1332,11 +1175,8 @@ namespace cv
{
using namespace histograms ;
Context * clCxt = mat_src . clCxt ;
int depth = mat_src . depth ( ) ;
String kernelName = " calc_sub_hist " ;
size_t localThreads [ 3 ] = { HISTOGRAM256_BIN_COUNT , 1 , 1 } ;
size_t globalThreads [ 3 ] = { PARTIAL_HISTOGRAM256_COUNT * localThreads [ 0 ] , 1 , 1 } ;
@ -1375,6 +1215,7 @@ namespace cv
src_offset > > = dataWidth_bits ;
int src_step = mat_src . step > > dataWidth_bits ;
int datacount = tempcols * mat_src . rows ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & mat_src . data ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & src_step ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & src_offset ) ) ;
@ -1384,16 +1225,17 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & inc_x ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & inc_y ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & hist_step ) ) ;
openCLExecuteKernel ( clCxt , & imgproc_histogram , kernelName , globalThreads , localThreads , args , - 1 , depth ) ;
openCLExecuteKernel ( mat_src . clCxt , & imgproc_histogram , " calc_sub_hist " , globalThreads , localThreads , args , - 1 , depth ) ;
}
if ( left_col ! = 0 | | right_col ! = 0 )
{
kernelName = " calc_sub_hist_border " ;
src_offset = mat_src . offset ;
localThreads [ 0 ] = 1 ;
localThreads [ 1 ] = 256 ;
globalThreads [ 0 ] = left_col + right_col ;
globalThreads [ 1 ] = ( mat_src . rows + localThreads [ 1 ] - 1 ) / localThreads [ 1 ] * localThreads [ 1 ] ;
globalThreads [ 1 ] = mat_src . rows ;
args . clear ( ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & mat_src . data ) ) ;
@ -1404,25 +1246,27 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & cols ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & mat_src . rows ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & hist_step ) ) ;
openCLExecuteKernel ( clCxt , & imgproc_histogram , kernelName , globalThreads , localThreads , args , - 1 , depth ) ;
openCLExecuteKernel ( mat_src . clCxt , & imgproc_histogram , " calc_sub_hist_border " , globalThreads , localThreads , args , - 1 , depth ) ;
}
}
static void merge_sub_hist ( const oclMat & sub_hist , oclMat & mat_hist )
{
using namespace histograms ;
Context * clCxt = sub_hist . clCxt ;
String kernelName = " merge_hist " ;
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
size_t globalThreads [ 3 ] = { HISTOGRAM256_BIN_COUNT * localThreads [ 0 ] , 1 , 1 } ;
int src_step = sub_hist . step > > 2 ;
std : : vector < std : : pair < size_t , const void * > > args ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & sub_hist . data ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & mat_hist . data ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & src_step ) ) ;
openCLExecuteKernel ( clCxt , & imgproc_histogram , kernelName , globalThreads , localThreads , args , - 1 , - 1 ) ;
openCLExecuteKernel ( sub_hist . clCxt , & imgproc_histogram , " merge_hist " , globalThreads , localThreads , args , - 1 , - 1 ) ;
}
void calcHist ( const oclMat & mat_src , oclMat & mat_hist )
{
using namespace histograms ;
@ -1435,6 +1279,7 @@ namespace cv
calc_sub_hist ( mat_src , buf ) ;
merge_sub_hist ( buf , mat_hist ) ;
}
///////////////////////////////////equalizeHist/////////////////////////////////////////////////////
void equalizeHist ( const oclMat & mat_src , oclMat & mat_dst )
{
@ -1444,17 +1289,17 @@ namespace cv
calcHist ( mat_src , mat_hist ) ;
Context * clCxt = mat_src . clCxt ;
String kernelName = " calLUT " ;
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
size_t globalThreads [ 3 ] = { 256 , 1 , 1 } ;
oclMat lut ( 1 , 256 , CV_8UC1 ) ;
std : : vector < std : : pair < size_t , const void * > > args ;
int total = mat_src . rows * mat_src . cols ;
std : : vector < std : : pair < size_t , const void * > > args ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & lut . data ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & mat_hist . data ) ) ;
args . push_back ( std : : make_pair ( sizeof ( int ) , ( void * ) & total ) ) ;
openCLExecuteKernel ( clCxt , & imgproc_histogram , kernelName , globalThreads , localThreads , args , - 1 , - 1 ) ;
openCLExecuteKernel ( mat_src . clCxt , & imgproc_histogram , " calLUT " , globalThreads , localThreads , args , - 1 , - 1 ) ;
LUT ( mat_src , lut , mat_dst ) ;
}
@ -1489,12 +1334,11 @@ namespace cv
else
{
cl_kernel kernel = openCLGetKernelFromSource ( Context : : getContext ( ) , & imgproc_clahe , kernelName ) ;
size_ t wave_size = queryWaveFrontSize ( kernel ) ;
in t wave_size = ( int ) queryWaveFrontSize ( kernel ) ;
openCLSafeCall ( clReleaseKernel ( kernel ) ) ;
static char opt [ 20 ] = { 0 } ;
sprintf ( opt , " -D WAVE_SIZE=%d " , ( int ) wave_size ) ;
openCLExecuteKernel ( Context : : getContext ( ) , & imgproc_clahe , kernelName , globalThreads , localThreads , args , - 1 , - 1 , opt ) ;
std : : string opt = format ( " -D WAVE_SIZE=%d " , wave_size ) ;
openCLExecuteKernel ( Context : : getContext ( ) , & imgproc_clahe , kernelName , globalThreads , localThreads , args , - 1 , - 1 , opt . c_str ( ) ) ;
}
}
@ -1518,11 +1362,10 @@ namespace cv
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & tilesX ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_int ) , ( void * ) & tilesY ) ) ;
String kernelName = " transform " ;
size_t localThreads [ 3 ] = { 32 , 8 , 1 } ;
size_t globalThreads [ 3 ] = { src . cols , src . rows , 1 } ;
openCLExecuteKernel ( Context : : getContext ( ) , & imgproc_clahe , kernelName , globalThreads , localThreads , args , - 1 , - 1 ) ;
openCLExecuteKernel ( Context : : getContext ( ) , & imgproc_clahe , " transform " , globalThreads , localThreads , args , - 1 , - 1 ) ;
}
}
@ -1553,6 +1396,7 @@ namespace cv
oclMat srcExt_ ;
oclMat lut_ ;
} ;
CLAHE_Impl : : CLAHE_Impl ( double clipLimit , int tilesX , int tilesY ) :
clipLimit_ ( clipLimit ) , tilesX_ ( tilesX ) , tilesY_ ( tilesY )
{
@ -1562,6 +1406,7 @@ namespace cv
obj . info ( ) - > addParam ( obj , " clipLimit " , obj . clipLimit_ ) ;
obj . info ( ) - > addParam ( obj , " tilesX " , obj . tilesX_ ) ;
obj . info ( ) - > addParam ( obj , " tilesY " , obj . tilesY_ ) )
void CLAHE_Impl : : apply ( cv : : InputArray src_raw , cv : : OutputArray dst_raw )
{
oclMat & src = getOclMatRef ( src_raw ) ;
@ -1601,7 +1446,6 @@ namespace cv
}
clahe : : calcLut ( srcForLut , lut_ , tilesX_ , tilesY_ , tileSize , clipLimit , lutScale ) ;
//finish();
clahe : : transform ( src , dst , lut_ , tilesX_ , tilesY_ , tileSize ) ;
}
@ -1639,8 +1483,8 @@ namespace cv
}
//////////////////////////////////bilateralFilter////////////////////////////////////////////////////
static void
oclbilateralFilter_8u ( const oclMat & src , oclMat & dst , int d ,
static void oclbilateralFilter_8u ( const oclMat & src , oclMat & dst , int d ,
double sigma_color , double sigma_space ,
int borderType )
{
@ -1678,6 +1522,7 @@ namespace cv
int dst_step_in_pixel = dst . step / dst . elemSize ( ) ;
int dst_offset_in_pixel = dst . offset / dst . elemSize ( ) ;
int temp_step_in_pixel = temp . step / temp . elemSize ( ) ;
// initialize color-related bilateral filter coefficients
for ( i = 0 ; i < 256 * cn ; i + + )
color_weight [ i ] = ( float ) std : : exp ( i * i * gauss_color_coeff ) ;
@ -1692,21 +1537,21 @@ namespace cv
space_weight [ maxk ] = ( float ) std : : exp ( r * r * gauss_space_coeff ) ;
space_ofs [ maxk + + ] = ( int ) ( i * temp_step_in_pixel + j ) ;
}
oclMat oclcolor_weight ( 1 , cn * 256 , CV_32FC1 , color_weight ) ;
oclMat oclspace_weight ( 1 , d * d , CV_32FC1 , space_weight ) ;
oclMat oclspace_ofs ( 1 , d * d , CV_32SC1 , space_ofs ) ;
String kernelName = " bilateral " ;
size_t localThreads [ 3 ] = { 16 , 16 , 1 } ;
size_t globalThreads [ 3 ] = { ( dst . cols + localThreads [ 0 ] - 1 ) / localThreads [ 0 ] * localThreads [ 0 ] ,
( dst . rows + localThreads [ 1 ] - 1 ) / localThreads [ 1 ] * localThreads [ 1 ] ,
1
} ;
size_t globalThreads [ 3 ] = { dst . cols , dst . rows , 1 } ;
if ( ( dst . type ( ) = = CV_8UC1 ) & & ( ( dst . offset & 3 ) = = 0 ) & & ( ( dst . cols & 3 ) = = 0 ) )
{
kernelName = " bilateral2 " ;
globalThreads [ 0 ] = ( dst . cols / 4 + localThreads [ 0 ] - 1 ) / localThreads [ 0 ] * localThreads [ 0 ] ;
globalThreads [ 0 ] = dst . cols / 4 ;
}
std : : vector < std : : pair < size_t , const void * > > args ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & dst . data ) ) ;
args . push_back ( std : : make_pair ( sizeof ( cl_mem ) , ( void * ) & temp . data ) ) ;
@ -1726,7 +1571,6 @@ namespace cv
}
void bilateralFilter ( const oclMat & src , oclMat & dst , int radius , double sigmaclr , double sigmaspc , int borderType )
{
dst . create ( src . size ( ) , src . type ( ) ) ;
if ( src . depth ( ) = = CV_8U )
oclbilateralFilter_8u ( src , dst , radius , sigmaclr , sigmaspc , borderType ) ;
@ -1870,7 +1714,7 @@ static void convolve_run_fft(const oclMat &image, const oclMat &templ, oclMat &r
}
# else
CV_Error ( Error : : StsNotImplemented , " OpenCL DFT is not implemented " ) ;
CV_Error ( Error : : OpenCLNoAMDBlasFft , " OpenCL DFT is not implemented " ) ;
# define UNUSED(x) (void)(x);
UNUSED ( image ) UNUSED ( templ ) UNUSED ( result ) UNUSED ( ccorr ) UNUSED ( buf )
# undef UNUSED