@ -19,7 +19,8 @@ enum
{
{
BLOCK_ROWS = 32 ,
BLOCK_ROWS = 32 ,
BLOCK_COLS = 32 ,
BLOCK_COLS = 32 ,
CTA_SIZE = 256
CTA_SIZE_INTEL = 64 ,
CTA_SIZE_DEFAULT = 256
} ;
} ;
static int divUp ( int a , int b )
static int divUp ( int a , int b )
@ -70,6 +71,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int templateWindowSize , int searchWindowSize )
int templateWindowSize , int searchWindowSize )
{
{
int type = _src . type ( ) , cn = CV_MAT_CN ( type ) ;
int type = _src . type ( ) , cn = CV_MAT_CN ( type ) ;
int ctaSize = ocl : : Device : : getDefault ( ) . isIntel ( ) ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT ;
Size size = _src . size ( ) ;
Size size = _src . size ( ) ;
if ( type ! = CV_8UC1 & & type ! = CV_8UC2 & & type ! = CV_8UC4 )
if ( type ! = CV_8UC1 & & type ! = CV_8UC2 & & type ! = CV_8UC4 )
@ -86,12 +88,12 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
String opts = format ( " -D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d "
String opts = format ( " -D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d "
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d "
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d "
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d "
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d "
" -D convert_int_t=%s -D cn=%d -D CTA_SIZE2=%d -D convert_uchar_t=%s " ,
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s " ,
templateWindowSize , searchWindowSize , ocl : : typeToStr ( type ) ,
templateWindowSize , searchWindowSize , ocl : : typeToStr ( type ) ,
ocl : : typeToStr ( CV_32SC ( cn ) ) , BLOCK_COLS , BLOCK_ROWS , CTA_SIZE ,
ocl : : typeToStr ( CV_32SC ( cn ) ) , BLOCK_COLS , BLOCK_ROWS , ctaSize ,
templateWindowHalfWize , searchWindowHalfSize ,
templateWindowHalfWize , searchWindowHalfSize ,
ocl : : convertTypeStr ( CV_8U , CV_32S , cn , cvt [ 0 ] ) , cn ,
ocl : : convertTypeStr ( CV_8U , CV_32S , cn , cvt [ 0 ] ) , cn ,
CTA_SIZE > > 1 , ocl : : convertTypeStr ( CV_32S , CV_8U , cn , cvt [ 1 ] ) ) ;
ocl : : convertTypeStr ( CV_32S , CV_8U , cn , cvt [ 1 ] ) ) ;
ocl : : Kernel k ( " fastNlMeansDenoising " , ocl : : photo : : nlmeans_oclsrc , opts ) ;
ocl : : Kernel k ( " fastNlMeansDenoising " , ocl : : photo : : nlmeans_oclsrc , opts ) ;
if ( k . empty ( ) )
if ( k . empty ( ) )
@ -120,7 +122,7 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
ocl : : KernelArg : : PtrReadOnly ( almostDist2Weight ) ,
ocl : : KernelArg : : PtrReadOnly ( almostDist2Weight ) ,
ocl : : KernelArg : : PtrReadOnly ( buffer ) , almostTemplateWindowSizeSqBinShift ) ;
ocl : : KernelArg : : PtrReadOnly ( buffer ) , almostTemplateWindowSizeSqBinShift ) ;
size_t globalsize [ 2 ] = { nblocksx * CTA_SIZE , nblocksy } , localsize [ 2 ] = { CTA_SIZE , 1 } ;
size_t globalsize [ 2 ] = { nblocksx * ctaSize , nblocksy } , localsize [ 2 ] = { ctaSize , 1 } ;
return k . run ( 2 , globalsize , localsize , false ) ;
return k . run ( 2 , globalsize , localsize , false ) ;
}
}