Merge pull request #12829 from tomoaki0705:fixTestVideoOpenCLPyrLK

pull/12905/head
Alexander Alekhin 6 years ago
commit ab2c16b2e5
  1. 25
      modules/video/src/lkpyramid.cpp
  2. 97
      modules/video/src/opencl/pyrlk.cl

@ -814,7 +814,7 @@ namespace
double minEigThreshold_ = 1e-4) : double minEigThreshold_ = 1e-4) :
winSize(winSize_), maxLevel(maxLevel_), criteria(criteria_), flags(flags_), minEigThreshold(minEigThreshold_) winSize(winSize_), maxLevel(maxLevel_), criteria(criteria_), flags(flags_), minEigThreshold(minEigThreshold_)
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
, iters(criteria_.maxCount), derivLambda(criteria_.epsilon), useInitialFlow(0 != (flags_ & OPTFLOW_LK_GET_MIN_EIGENVALS)), waveSize(0) , iters(criteria_.maxCount), derivLambda(criteria_.epsilon), useInitialFlow(0 != (flags_ & OPTFLOW_LK_GET_MIN_EIGENVALS))
#endif #endif
{ {
} }
@ -856,8 +856,6 @@ namespace
calcPatchSize(); calcPatchSize();
if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6) if (patch.x <= 0 || patch.x >= 6 || patch.y <= 0 || patch.y >= 6)
return false; return false;
if (!initWaveSize())
return false;
return true; return true;
} }
@ -926,19 +924,6 @@ namespace
int iters; int iters;
double derivLambda; double derivLambda;
bool useInitialFlow; bool useInitialFlow;
int waveSize;
bool initWaveSize()
{
waveSize = 1;
if (isDeviceCPU())
return true;
ocl::Kernel kernel;
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, ""))
return false;
waveSize = (int)kernel.preferedWorkGroupSizeMultiple();
return true;
}
dim3 patch; dim3 patch;
void calcPatchSize() void calcPatchSize()
{ {
@ -977,8 +962,8 @@ namespace
if (isDeviceCPU()) if (isDeviceCPU())
build_options = " -D CPU"; build_options = " -D CPU";
else else
build_options = cv::format("-D WAVE_SIZE=%d -D WSX=%d -D WSY=%d", build_options = cv::format("-D WSX=%d -D WSY=%d",
waveSize, wsx, wsy); wsx, wsy);
ocl::Kernel kernel; ocl::Kernel kernel;
if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options)) if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options))
@ -1064,7 +1049,9 @@ namespace
_status.create((int)npoints, 1, CV_8UC1); _status.create((int)npoints, 1, CV_8UC1);
UMat umatNextPts = _nextPts.getUMat(); UMat umatNextPts = _nextPts.getUMat();
UMat umatStatus = _status.getUMat(); UMat umatStatus = _status.getUMat();
return sparse(_prevImg.getUMat(), _nextImg.getUMat(), _prevPts.getUMat(), umatNextPts, umatStatus, umatErr); UMat umatPrevPts;
_prevPts.getMat().copyTo(umatPrevPts);
return sparse(_prevImg.getUMat(), _nextImg.getUMat(), umatPrevPts, umatNextPts, umatStatus, umatErr);
} }
#endif #endif

@ -53,9 +53,6 @@
#define LM_H (LSy*GRIDSIZE+2) #define LM_H (LSy*GRIDSIZE+2)
#define BUFFER (LSx*LSy) #define BUFFER (LSx*LSy)
#define BUFFER2 BUFFER>>1 #define BUFFER2 BUFFER>>1
#ifndef WAVE_SIZE
#define WAVE_SIZE 1
#endif
#ifdef CPU #ifdef CPU
@ -78,7 +75,7 @@ inline void reduce3(float val1, float val2, float val3, __local float* smem1,
} }
} }
inline void reduce2(float val1, float val2, volatile __local float* smem1, volatile __local float* smem2, int tid) inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
{ {
smem1[tid] = val1; smem1[tid] = val1;
smem2[tid] = val2; smem2[tid] = val2;
@ -95,7 +92,7 @@ inline void reduce2(float val1, float val2, volatile __local float* smem1, volat
} }
} }
inline void reduce1(float val1, volatile __local float* smem1, int tid) inline void reduce1(float val1, __local float* smem1, int tid)
{ {
smem1[tid] = val1; smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
@ -111,7 +108,7 @@ inline void reduce1(float val1, volatile __local float* smem1, int tid)
} }
#else #else
inline void reduce3(float val1, float val2, float val3, inline void reduce3(float val1, float val2, float val3,
__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid) __local float* smem1, __local float* smem2, __local float* smem3, int tid)
{ {
smem1[tid] = val1; smem1[tid] = val1;
smem2[tid] = val2; smem2[tid] = val2;
@ -123,38 +120,39 @@ inline void reduce3(float val1, float val2, float val3,
smem1[tid] += smem1[tid + 32]; smem1[tid] += smem1[tid + 32];
smem2[tid] += smem2[tid + 32]; smem2[tid] += smem2[tid + 32];
smem3[tid] += smem3[tid + 32]; smem3[tid] += smem3[tid + 32];
#if WAVE_SIZE < 32
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) if (tid < 16)
{ {
#endif
smem1[tid] += smem1[tid + 16]; smem1[tid] += smem1[tid + 16];
smem2[tid] += smem2[tid + 16]; smem2[tid] += smem2[tid + 16];
smem3[tid] += smem3[tid + 16]; smem3[tid] += smem3[tid + 16];
#if WAVE_SIZE <16
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid<1) if (tid < 8)
{ {
#endif smem1[tid] += smem1[tid + 8];
local float8* m1 = (local float8*)smem1; smem2[tid] += smem2[tid + 8];
local float8* m2 = (local float8*)smem2; smem3[tid] += smem3[tid + 8];
local float8* m3 = (local float8*)smem3; }
float8 t1 = m1[0]+m1[1]; barrier(CLK_LOCAL_MEM_FENCE);
float8 t2 = m2[0]+m2[1]; if (tid < 4)
float8 t3 = m3[0]+m3[1]; {
float4 t14 = t1.lo + t1.hi; smem1[tid] += smem1[tid + 4];
float4 t24 = t2.lo + t2.hi; smem2[tid] += smem2[tid + 4];
float4 t34 = t3.lo + t3.hi; smem3[tid] += smem3[tid + 4];
smem1[0] = t14.x+t14.y+t14.z+t14.w; }
smem2[0] = t24.x+t24.y+t24.z+t24.w; barrier(CLK_LOCAL_MEM_FENCE);
smem3[0] = t34.x+t34.y+t34.z+t34.w; if (tid == 0)
{
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
smem3[0] = (smem3[0] + smem3[1]) + (smem3[2] + smem3[3]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
inline void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid) inline void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
{ {
smem1[tid] = val1; smem1[tid] = val1;
smem2[tid] = val2; smem2[tid] = val2;
@ -164,33 +162,35 @@ inline void reduce2(float val1, float val2, __local volatile float* smem1, __loc
{ {
smem1[tid] += smem1[tid + 32]; smem1[tid] += smem1[tid + 32];
smem2[tid] += smem2[tid + 32]; smem2[tid] += smem2[tid + 32];
#if WAVE_SIZE < 32
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) if (tid < 16)
{ {
#endif
smem1[tid] += smem1[tid + 16]; smem1[tid] += smem1[tid + 16];
smem2[tid] += smem2[tid + 16]; smem2[tid] += smem2[tid + 16];
#if WAVE_SIZE <16
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid<1) if (tid < 8)
{ {
#endif smem1[tid] += smem1[tid + 8];
local float8* m1 = (local float8*)smem1; smem2[tid] += smem2[tid + 8];
local float8* m2 = (local float8*)smem2; }
float8 t1 = m1[0]+m1[1]; barrier(CLK_LOCAL_MEM_FENCE);
float8 t2 = m2[0]+m2[1]; if (tid < 4)
float4 t14 = t1.lo + t1.hi; {
float4 t24 = t2.lo + t2.hi; smem1[tid] += smem1[tid + 4];
smem1[0] = t14.x+t14.y+t14.z+t14.w; smem2[tid] += smem2[tid + 4];
smem2[0] = t24.x+t24.y+t24.z+t24.w; }
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
{
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
smem2[0] = (smem2[0] + smem2[1]) + (smem2[2] + smem2[3]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
inline void reduce1(float val1, __local volatile float* smem1, int tid) inline void reduce1(float val1, __local float* smem1, int tid)
{ {
smem1[tid] = val1; smem1[tid] = val1;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
@ -198,23 +198,26 @@ inline void reduce1(float val1, __local volatile float* smem1, int tid)
if (tid < 32) if (tid < 32)
{ {
smem1[tid] += smem1[tid + 32]; smem1[tid] += smem1[tid + 32];
#if WAVE_SIZE < 32
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) if (tid < 16)
{ {
#endif
smem1[tid] += smem1[tid + 16]; smem1[tid] += smem1[tid + 16];
#if WAVE_SIZE <16
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid<1) if (tid < 8)
{ {
#endif smem1[tid] += smem1[tid + 8];
local float8* m1 = (local float8*)smem1; }
float8 t1 = m1[0]+m1[1]; barrier(CLK_LOCAL_MEM_FENCE);
float4 t14 = t1.lo + t1.hi; if (tid < 4)
smem1[0] = t14.x+t14.y+t14.z+t14.w; {
smem1[tid] += smem1[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
{
smem1[0] = (smem1[0] + smem1[1]) + (smem1[2] + smem1[3]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }

Loading…
Cancel
Save