Optimized mog and mog2, which have much better performance.

pull/1283/head
Jin Ma 12 years ago
parent 1bcd1fd3a2
commit 1e8194fd3c
  1. 15
      modules/ocl/src/bgfg_mog.cpp
  2. 193
      modules/ocl/src/opencl/bgfg_mog.cl
  3. 2
      modules/ocl/test/test_bgfg.cpp

@ -254,7 +254,7 @@ static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, ocl
} }
static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var, static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar) int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
{ {
Context* clCxt = Context::getContext(); Context* clCxt = Context::getContext();
@ -262,6 +262,8 @@ static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat
size_t local_thread[] = {32, 8, 1}; size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {frame.cols, frame.rows, 1}; size_t global_thread[] = {frame.cols, frame.rows, 1};
oclMat fgmask(fgmask_raw.size(), CV_32SC1);
int frame_step = (int)(frame.step/frame.elemSize()); int frame_step = (int)(frame.step/frame.elemSize());
int fgmask_step = (int)(fgmask.step/fgmask.elemSize()); int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
int weight_step = (int)(weight.step/weight.elemSize()); int weight_step = (int)(weight.step/weight.elemSize());
@ -318,6 +320,8 @@ static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat
args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y)); args.push_back(make_pair(sizeof(cl_int), (void*)&frame_offset_y));
openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
fgmask.convertTo(fgmask, CV_8U);
fgmask.copyTo(fgmask_raw);
} }
void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var, void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
@ -392,9 +396,11 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var
(void *)constants, sizeof(_contant_struct)); (void *)constants, sizeof(_contant_struct));
} }
void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance,
oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures) oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
{ {
oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
Context* clCxt = Context::getContext(); Context* clCxt = Context::getContext();
const float alpha1 = 1.0f - alphaT; const float alpha1 = 1.0f - alphaT;
@ -464,6 +470,9 @@ void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask,
args.push_back(make_pair(sizeof(cl_mem), (void*)&cl_constants)); args.push_back(make_pair(sizeof(cl_mem), (void*)&cl_constants));
openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option); openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
fgmask.convertTo(fgmask, CV_8U);
fgmask.copyTo(fgmaskRaw);
} }
void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures) void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
@ -580,7 +589,7 @@ void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
mean_.setTo(Scalar::all(0)); mean_.setTo(Scalar::all(0));
//make the array for keeping track of the used modes per pixel - all zeros at start //make the array for keeping track of the used modes per pixel - all zeros at start
bgmodelUsedModes_.create(frameSize_, CV_8UC1); bgmodelUsedModes_.create(frameSize_, CV_32FC1);
bgmodelUsedModes_.setTo(cv::Scalar::all(0)); bgmodelUsedModes_.setTo(cv::Scalar::all(0));
loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection); loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);

@ -188,7 +188,7 @@ __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar
} }
} }
__kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask, __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global int* fgmask,
__global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean, __global float* weight, __global float* sortKey, __global T_MEAN_VAR* mean,
__global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step, __global T_MEAN_VAR* var, int frame_row, int frame_col, int frame_step, int fgmask_step,
int weight_step, int sortKey_step, int mean_step, int var_step, int weight_step, int sortKey_step, int mean_step, int var_step,
@ -202,130 +202,125 @@ __kernel void mog_withLearning_kernel(__global T_FRAME* frame, __global uchar* f
int x = get_global_id(0); int x = get_global_id(0);
int y = get_global_id(1); int y = get_global_id(1);
if(x < frame_col && y < frame_row) if(x >= frame_col || y >= frame_row) return;
{ float wsum = 0.0f;
int kHit = -1;
float wsum = 0.0f; int kForeground = -1;
int kHit = -1; int k = 0;
int kForeground = -1;
int k = 0;
T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]); T_MEAN_VAR pix = cvt(frame[(y + frame_offset_y) * frame_step + (x + frame_offset_x)]);
for (; k < (NMIXTURES); ++k)
{
float w = weight[(k * frame_row + y) * weight_step + x];
wsum += w;
if (w < 1.192092896e-07f) for (; k < (NMIXTURES); ++k)
break; {
float w = weight[(k * frame_row + y) * weight_step + x];
wsum += w;
T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x]; if (w < 1.192092896e-07f)
T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x]; break;
T_MEAN_VAR diff = pix - mu; T_MEAN_VAR mu = mean[(k * frame_row + y) * mean_step + x];
T_MEAN_VAR _var = var[(k * frame_row + y) * var_step + x];
if (sqr(diff) < varThreshold * sum(_var)) float sortKey_prev, weight_prev;
{ T_MEAN_VAR mean_prev, var_prev;
wsum -= w; if (sqr(pix - mu) < varThreshold * sum(_var))
float dw = learningRate * (1.0f - w); {
wsum -= w;
_var = clamp1(_var, learningRate, diff, minVar); float dw = learningRate * (1.0f - w);
float sortKey_prev = w / sqr(sum(_var)); _var = clamp1(_var, learningRate, pix - mu, minVar);
sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
float weight_prev = w + dw; sortKey_prev = w / sqr(sum(_var));
weight[(k * frame_row + y) * weight_step + x] = weight_prev; sortKey[(k * frame_row + y) * sortKey_step + x] = sortKey_prev;
T_MEAN_VAR mean_prev = mu + learningRate * diff; weight_prev = w + dw;
mean[(k * frame_row + y) * mean_step + x] = mean_prev; weight[(k * frame_row + y) * weight_step + x] = weight_prev;
T_MEAN_VAR var_prev = _var; mean_prev = mu + learningRate * (pix - mu);
var[(k * frame_row + y) * var_step + x] = var_prev; mean[(k * frame_row + y) * mean_step + x] = mean_prev;
int k1 = k - 1; var_prev = _var;
var[(k * frame_row + y) * var_step + x] = var_prev;
}
if (k1 >= 0) int k1 = k - 1;
{
float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
for (; sortKey_next < sortKey_prev && k1 >= 0; --k1) if (k1 >= 0 && sqr(pix - mu) < varThreshold * sum(_var))
{ {
sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev; float sortKey_next = sortKey[(k1 * frame_row + y) * sortKey_step + x];
sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next; float weight_next = weight[(k1 * frame_row + y) * weight_step + x];
T_MEAN_VAR mean_next = mean[(k1 * frame_row + y) * mean_step + x];
T_MEAN_VAR var_next = var[(k1 * frame_row + y) * var_step + x];
weight[(k1 * frame_row + y) * weight_step + x] = weight_prev; for (; sortKey_next < sortKey_prev && k1 >= 0; --k1)
weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next; {
sortKey[(k1 * frame_row + y) * sortKey_step + x] = sortKey_prev;
sortKey[((k1 + 1) * frame_row + y) * sortKey_step + x] = sortKey_next;
mean[(k1 * frame_row + y) * mean_step + x] = mean_prev; weight[(k1 * frame_row + y) * weight_step + x] = weight_prev;
mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next; weight[((k1 + 1) * frame_row + y) * weight_step + x] = weight_next;
var[(k1 * frame_row + y) * var_step + x] = var_prev; mean[(k1 * frame_row + y) * mean_step + x] = mean_prev;
var[((k1 + 1) * frame_row + y) * var_step + x] = var_next; mean[((k1 + 1) * frame_row + y) * mean_step + x] = mean_next;
sortKey_prev = sortKey_next; var[(k1 * frame_row + y) * var_step + x] = var_prev;
sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f; var[((k1 + 1) * frame_row + y) * var_step + x] = var_next;
weight_prev = weight_next; sortKey_prev = sortKey_next;
weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f; sortKey_next = k1 > 0 ? sortKey[((k1 - 1) * frame_row + y) * sortKey_step + x] : 0.0f;
mean_prev = mean_next; weight_prev = weight_next;
mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO; weight_next = k1 > 0 ? weight[((k1 - 1) * frame_row + y) * weight_step + x] : 0.0f;
var_prev = var_next; mean_prev = mean_next;
var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO; mean_next = k1 > 0 ? mean[((k1 - 1) * frame_row + y) * mean_step + x] : (T_MEAN_VAR)F_ZERO;
}
}
kHit = k1 + 1; var_prev = var_next;
break; var_next = k1 > 0 ? var[((k1 - 1) * frame_row + y) * var_step + x] : (T_MEAN_VAR)F_ZERO;
} }
} }
if (kHit < 0) kHit = k1 + 1;
{ break;
kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1); }
wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
weight[(k * frame_row + y) * weight_step + x] = w0;
mean[(k * frame_row + y) * mean_step + x] = pix;
#if defined (CN1)
var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
#else
var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
#endif
sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
}
else
{
for( ; k < (NMIXTURES); k++)
wsum += weight[(k * frame_row + y) * weight_step + x];
}
float wscale = 1.0f / wsum; if (kHit < 0)
wsum = 0; {
for (k = 0; k < (NMIXTURES); ++k) kHit = k = k < ((NMIXTURES) - 1) ? k : ((NMIXTURES) - 1);
{ wsum += w0 - weight[(k * frame_row + y) * weight_step + x];
float w = weight[(k * frame_row + y) * weight_step + x];
wsum += w *= wscale; weight[(k * frame_row + y) * weight_step + x] = w0;
mean[(k * frame_row + y) * mean_step + x] = pix;
#if defined (CN1)
var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0);
#else
var[(k * frame_row + y) * var_step + x] = (T_MEAN_VAR)(var0, var0, var0, var0);
#endif
sortKey[(k * frame_row + y) * sortKey_step + x] = sk0;
}
else
{
for( ; k < (NMIXTURES); k++)
wsum += weight[(k * frame_row + y) * weight_step + x];
}
weight[(k * frame_row + y) * weight_step + x] = w; float wscale = 1.0f / wsum;
sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale; wsum = 0;
for (k = 0; k < (NMIXTURES); ++k)
{
float w = weight[(k * frame_row + y) * weight_step + x];
w *= wscale;
wsum += w;
if (wsum > backgroundRatio && kForeground < 0) weight[(k * frame_row + y) * weight_step + x] = w;
kForeground = k + 1; sortKey[(k * frame_row + y) * sortKey_step + x] *= wscale;
}
if(kHit >= kForeground) kForeground = select(kForeground, k + 1, wsum > backgroundRatio && kForeground < 0);
fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-1);
else
fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(0);
} }
fgmask[(y + fgmask_offset_y) * fgmask_step + (x + fgmask_offset_x)] = (uchar)(-(kHit >= kForeground));
} }
__kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst, __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_VAR* mean, __global T_FRAME* dst,
int dst_row, int dst_col, int weight_step, int mean_step, int dst_step, int dst_row, int dst_col, int weight_step, int mean_step, int dst_step,
float backgroundRatio) float backgroundRatio)
@ -355,8 +350,8 @@ __kernel void getBackgroundImage_kernel(__global float* weight, __global T_MEAN_
} }
} }
__kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __global float* weight, __global T_MEAN_VAR * mean, __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __global float* weight, __global T_MEAN_VAR * mean,
__global uchar* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step, __global int* modesUsed, __global float* variance, int frame_row, int frame_col, int frame_step,
int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune, int fgmask_step, int weight_step, int mean_step, int modesUsed_step, int var_step, float alphaT, float alpha1, float prune,
int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants) int detectShadows_flag, int fgmask_offset_x, int fgmask_offset_y, int frame_offset_x, int frame_offset_y, __constant con_srtuct_t* constants)
{ {
@ -509,7 +504,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global uchar* fgmask, __gl
} }
} }
__kernel void getBackgroundImage2_kernel(__global uchar* modesUsed, __global float* weight, __global T_MEAN_VAR* mean, __kernel void getBackgroundImage2_kernel(__global int* modesUsed, __global float* weight, __global T_MEAN_VAR* mean,
__global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step, __global T_FRAME* dst, float c_TB, int modesUsed_row, int modesUsed_col, int modesUsed_step, int weight_step,
int mean_step, int dst_step, int dst_x, int dst_y) int mean_step, int dst_step, int dst_x, int dst_y)
{ {

@ -191,7 +191,7 @@ TEST_P(mog2, getBackgroundImage)
if (useGray) if (useGray)
return; return;
std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "gpu/768x576.avi"; std::string inputFile = string(cvtest::TS::ptr()->get_data_path()) + "video/768x576.avi";
cv::VideoCapture cap(inputFile); cv::VideoCapture cap(inputFile);
ASSERT_TRUE(cap.isOpened()); ASSERT_TRUE(cap.isOpened());

Loading…
Cancel
Save