@ -103,7 +103,11 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
int dststep1 = dst . step / dst . elemSize ( ) , dstoffset1 = dst . offset / dst . elemSize ( ) ;
std : : vector < uchar > m ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 16 , 10 , 1 } ;
# else
size_t localThreads [ 3 ] = { 16 , 16 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { dst . cols , dst . rows , 1 } ;
std : : string kernelName = " arithm_binary_op " ;
@ -337,10 +341,15 @@ static void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int groupn
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & src . data ) ) ;
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & dst ) ) ;
size_t globalThreads [ 3 ] = { groupnum * 256 , 1 , 1 } ;
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
# ifdef ANDROID
openCLExecuteKernel ( src . clCxt , & arithm_sum , " arithm_op_sum " , globalThreads , NULL ,
args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
# else
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
openCLExecuteKernel ( src . clCxt , & arithm_sum , " arithm_op_sum " , globalThreads , localThreads ,
args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
# endif
}
template < typename T >
@ -515,6 +524,7 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem
size_t globalThreads [ 3 ] = { groupnum * 256 , 1 , 1 } ;
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
// kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
openCLExecuteKernel ( src . clCxt , & arithm_minMax , kernelName , globalThreads , localThreads ,
args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
}
@ -616,7 +626,11 @@ static void arithm_absdiff_nonsaturate_run(const oclMat & src1, const oclMat & s
int diffstep1 = diff . step / diff . elemSize ( ) , diffoffset1 = diff . offset / diff . elemSize ( ) ;
string kernelName = " arithm_absdiff_nonsaturate " ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 16 , 10 , 1 } ;
# else
size_t localThreads [ 3 ] = { 16 , 16 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { diff . cols , diff . rows , 1 } ;
const char * const typeMap [ ] = { " uchar " , " char " , " ushort " , " short " , " int " , " float " , " double " } ;
@ -835,7 +849,11 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel
int srcoffset1 = src . offset / src . elemSize1 ( ) , dstoffset1 = dst . offset / dst . elemSize1 ( ) ;
int srcstep1 = src . step1 ( ) , dststep1 = dst . step1 ( ) ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 64 , 2 , 1 } ;
# else
size_t localThreads [ 3 ] = { 64 , 4 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { dst . cols , dst . rows , 1 } ;
std : : string buildOptions = format ( " -D srcT=%s " ,
@ -873,7 +891,11 @@ static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src
{
int depth = dst . depth ( ) ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 64 , 2 , 1 } ;
# else
size_t localThreads [ 3 ] = { 64 , 4 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { dst . cols , dst . rows , 1 } ;
int src1_step = src1 . step / src1 . elemSize ( ) , src1_offset = src1 . offset / src1 . elemSize ( ) ;
@ -921,7 +943,11 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
int src2step1 = src2 . step / src2 . elemSize1 ( ) , src2offset1 = src2 . offset / src2 . elemSize1 ( ) ;
int dststep1 = dst . step / dst . elemSize1 ( ) , dstoffset1 = dst . offset / dst . elemSize1 ( ) ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 64 , 2 , 1 } ;
# else
size_t localThreads [ 3 ] = { 64 , 4 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { cols1 , dst . rows , 1 } ;
vector < pair < size_t , const void * > > args ;
@ -967,7 +993,11 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o
int cols = src1 . cols * channels ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 64 , 2 , 1 } ;
# else
size_t localThreads [ 3 ] = { 64 , 4 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { cols , src1 . rows , 1 } ;
int src1_step = src1 . step / src1 . elemSize1 ( ) , src1_offset = src1 . offset / src1 . elemSize1 ( ) ;
@ -1021,7 +1051,11 @@ static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &d
int channels = src2 . oclchannels ( ) , depth = src2 . depth ( ) ;
int cols = src2 . cols * channels , rows = src2 . rows ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 64 , 2 , 1 } ;
# else
size_t localThreads [ 3 ] = { 64 , 4 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { cols , rows , 1 } ;
int src1_step = src1 . step / src1 . elemSize1 ( ) , src1_offset = src1 . offset / src1 . elemSize1 ( ) ;
@ -1097,6 +1131,8 @@ static void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen ,
char build_options [ 50 ] ;
sprintf ( build_options , " -D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d " , src . depth ( ) , repeat_s , repeat_e ) ;
size_t gt [ 3 ] = { groupnum * 256 , 1 , 1 } , lt [ 3 ] = { 256 , 1 , 1 } ;
// kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
openCLExecuteKernel ( src . clCxt , & arithm_minMaxLoc , " arithm_op_minMaxLoc " , gt , lt , args , - 1 , - 1 , build_options ) ;
}
@ -1126,6 +1162,7 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask,
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & mask . data ) ) ;
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & dst ) ) ;
// kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
openCLExecuteKernel ( src . clCxt , & arithm_minMaxLoc_mask , " arithm_op_minMaxLoc_mask " , gt , lt , args , - 1 , - 1 , build_options ) ;
}
}
@ -1243,10 +1280,15 @@ static void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int grou
args . push_back ( make_pair ( sizeof ( cl_mem ) , ( void * ) & dst ) ) ;
size_t globalThreads [ 3 ] = { groupnum * 256 , 1 , 1 } ;
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
# ifdef ANDROID
openCLExecuteKernel ( src . clCxt , & arithm_nonzero , kernelName , globalThreads , NULL ,
args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
# else
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
openCLExecuteKernel ( src . clCxt , & arithm_nonzero , kernelName , globalThreads , localThreads ,
args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
# endif
}
int cv : : ocl : : countNonZero ( const oclMat & src )
@ -1304,7 +1346,11 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName
int offset_cols = ( dst . offset / dst . elemSize1 ( ) ) & ( vector_length - 1 ) ;
int cols = divUp ( dst . cols * channels + offset_cols , vector_length ) ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 64 , 2 , 1 } ;
# else
size_t localThreads [ 3 ] = { 64 , 4 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { cols , dst . rows , 1 } ;
int dst_step1 = dst . cols * dst . elemSize ( ) ;
@ -1344,7 +1390,11 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
operationMap [ operationType ] , vlenstr . c_str ( ) , vlenstr . c_str ( ) ,
( int ) src1 . elemSize ( ) , vlen , vlenstr . c_str ( ) ) ;
# ifdef ANDROID
size_t localThreads [ 3 ] = { 16 , 10 , 1 } ;
# else
size_t localThreads [ 3 ] = { 16 , 16 , 1 } ;
# endif
size_t globalThreads [ 3 ] = { dst . cols , dst . rows , 1 } ;
vector < pair < size_t , const void * > > args ;
@ -1592,7 +1642,6 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
typeMap [ depth ] , hasDouble ? " double " : " float " , typeMap [ depth ] ,
depth > = CV_32F ? " " : " _sat_rte " ) ;
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
size_t globalThreads [ 3 ] = { cols1 , dst . rows , 1 } ;
float alpha_f = static_cast < float > ( alpha ) ,
@ -1626,8 +1675,14 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
args . push_back ( make_pair ( sizeof ( cl_int ) , ( void * ) & cols1 ) ) ;
args . push_back ( make_pair ( sizeof ( cl_int ) , ( void * ) & src1 . rows ) ) ;
# ifdef ANDROID
openCLExecuteKernel ( clCxt , & arithm_addWeighted , " addWeighted " , globalThreads , NULL ,
args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
# else
size_t localThreads [ 3 ] = { 256 , 1 , 1 } ;
openCLExecuteKernel ( clCxt , & arithm_addWeighted , " addWeighted " , globalThreads , localThreads ,
args , - 1 , - 1 , buildOptions . c_str ( ) ) ;
# endif
}
//////////////////////////////////////////////////////////////////////////////