Optimize runLBPClassifierStumpSimple by built-in mad24.

Signed-off-by: Yan Wang <yan.wang@linux.intel.com>
10 years ago · df697f6520
parent 4949f70860
commit df697f6520
1 changed files with 2 additions and 2 deletions
--- a/modules/objdetect/src/opencl/cascadedetect.cl
+++ b/modules/objdetect/src/opencl/cascadedetect.cl
@ -397,8 +397,8 @@ __kernel void runLBPClassifierStumpSimple(

        for( tileIdx = groupIdx; tileIdx < totalTiles; tileIdx += ngroups )
        {
-            int iy = ((tileIdx / ntiles.x)*local_size_y + ly)*ystep;
-            int ix = ((tileIdx % ntiles.x)*local_size_x + lx)*ystep;
+            int iy = mad24((tileIdx / ntiles.x), local_size_y, ly) * ystep;
+            int ix = mad24((tileIdx % ntiles.x), local_size_x, lx) * ystep;

            if( ix < worksize.x && iy < worksize.y )
            {