From 433c3da7cfad09c191dc234f0693a86bc4fd628f Mon Sep 17 00:00:00 2001 From: Yan Wang <yan.wang@linux.intel.com> Date: Mon, 23 Mar 2015 17:47:40 +0800 Subject: [PATCH] Optimize the performance of cascade OpenCL kernel. 1. Use built-in mad() instead of += and *. 2. For stump stages, if weight.z == 0, avoid unnecessary calculation because some features only have 2 rectangles.. It could improve OCL_Cascade_Image_MinSize_CascadeClassifier.CascadeClassifier/* about 10% mean values. Signed-off-by: Yan Wang <yan.wang@linux.intel.com> --- modules/objdetect/src/opencl/cascadedetect.cl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/modules/objdetect/src/opencl/cascadedetect.cl b/modules/objdetect/src/opencl/cascadedetect.cl index 854a7f617d..7ab581a282 100644 --- a/modules/objdetect/src/opencl/cascadedetect.cl +++ b/modules/objdetect/src/opencl/cascadedetect.cl @@ -180,11 +180,11 @@ void runHaarClassifier( int4 ofs = f->ofs[0]; sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x; ofs = f->ofs[1]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval); if( weight.z > 0 ) { ofs = f->ofs[2]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval); } s += (sval < st.y*nf) ? st.z : st.w; @@ -204,11 +204,11 @@ void runHaarClassifier( sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x; ofs = f->ofs[1]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval); if( weight.z > 0 ) { ofs = f->ofs[2]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval); } idx = (sval < as_float(n.y)*nf) ? n.z : n.w; @@ -281,11 +281,12 @@ void runHaarClassifier( int4 ofs = f->ofs[0]; float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x; ofs = f->ofs[1]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval); //if( weight.z > 0 ) + if( fabs(weight.z) > 0 ) { ofs = f->ofs[2]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval); } partsum += (sval < st.y*nf) ? st.z : st.w; @@ -303,11 +304,11 @@ void runHaarClassifier( float sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x; ofs = f->ofs[1]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.y, sval); if( weight.z > 0 ) { ofs = f->ofs[2]; - sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z; + sval = mad((psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w]), weight.z, sval); } idx = (sval < as_float(n.y)*nf) ? n.z : n.w;