Merge pull request #2217 from ilya-lavrenov:tapi_superres

pull/2262/merge
Andrey Pavlenko 11 years ago committed by OpenCV Buildbot
commit 0fef7f8b96
  1. 2
      modules/core/include/opencv2/core/mat.hpp
  2. 2
      modules/core/include/opencv2/core/opencl/ocl_defs.hpp
  3. 12
      modules/core/src/matrix.cpp
  4. 3
      modules/superres/CMakeLists.txt
  5. 2
      modules/superres/include/opencv2/superres.hpp
  6. 67
      modules/superres/perf/perf_superres.cpp
  7. 143
      modules/superres/perf/perf_superres_ocl.cpp
  8. 611
      modules/superres/src/btv_l1.cpp
  9. 725
      modules/superres/src/btv_l1_ocl.cpp
  10. 15
      modules/superres/src/frame_source.cpp
  11. 149
      modules/superres/src/input_array_utility.cpp
  12. 9
      modules/superres/src/input_array_utility.hpp
  13. 189
      modules/superres/src/opencl/superres_btvl1.cl
  14. 342
      modules/superres/src/optical_flow.cpp
  15. 4
      modules/superres/src/precomp.hpp
  16. 5
      modules/superres/src/super_resolution.cpp
  17. 51
      modules/superres/test/test_superres.cpp
  18. 4
      modules/ts/include/opencv2/ts/ocl_perf.hpp

@ -217,7 +217,7 @@ public:
virtual void createSameSize(const _InputArray& arr, int mtype) const;
virtual void release() const;
virtual void clear() const;
virtual void setTo(const _InputArray& value) const;
virtual void setTo(const _InputArray& value, const _InputArray & mask = _InputArray()) const;
};

@ -5,6 +5,8 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//#define CV_OPENCL_RUN_VERBOSE
#ifdef HAVE_OPENCL
#ifdef CV_OPENCL_RUN_VERBOSE

@ -2560,7 +2560,7 @@ cuda::CudaMem& _OutputArray::getCudaMemRef() const
return *(cuda::CudaMem*)obj;
}
void _OutputArray::setTo(const _InputArray& arr) const
void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const
{
int k = kind();
@ -2569,10 +2569,16 @@ void _OutputArray::setTo(const _InputArray& arr) const
else if( k == MAT || k == MATX || k == STD_VECTOR )
{
Mat m = getMat();
m.setTo(arr);
m.setTo(arr, mask);
}
else if( k == UMAT )
((UMat*)obj)->setTo(arr);
((UMat*)obj)->setTo(arr, mask);
else if( k == GPU_MAT )
{
Mat value = arr.getMat();
CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::GPU_MAT) );
((cuda::GpuMat*)obj)->setTo(Scalar(Vec<double, 4>((double *)value.data)), mask);
}
else
CV_Error(Error::StsNotImplemented, "");
}

@ -5,5 +5,4 @@ endif()
set(the_description "Super Resolution")
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
ocv_define_module(superres opencv_imgproc opencv_video
OPTIONAL opencv_highgui opencv_ocl
opencv_cudaarithm opencv_cudafilters opencv_cudawarping opencv_cudaimgproc opencv_cudaoptflow opencv_cudacodec)
OPTIONAL opencv_highgui opencv_cudaarithm opencv_cudafilters opencv_cudawarping opencv_cudaimgproc opencv_cudaoptflow opencv_cudacodec)

@ -83,6 +83,8 @@ namespace cv
virtual void initImpl(Ptr<FrameSource>& frameSource) = 0;
virtual void processImpl(Ptr<FrameSource>& frameSource, OutputArray output) = 0;
bool isUmat_;
private:
Ptr<FrameSource> frameSource_;
bool firstCall_;

@ -41,6 +41,7 @@
//M*/
#include "perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
using namespace std;
using namespace std::tr1;
@ -91,37 +92,26 @@ namespace
class ZeroOpticalFlow : public DenseOpticalFlowExt
{
public:
void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2)
virtual void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2)
{
cv::Size size = frame0.size();
if (!flow2.needed())
{
flow1.create(size, CV_32FC2);
if (flow1.kind() == cv::_InputArray::GPU_MAT)
flow1.getGpuMatRef().setTo(cv::Scalar::all(0));
else
flow1.getMatRef().setTo(cv::Scalar::all(0));
flow1.setTo(cv::Scalar::all(0));
}
else
{
flow1.create(size, CV_32FC1);
flow2.create(size, CV_32FC1);
if (flow1.kind() == cv::_InputArray::GPU_MAT)
flow1.getGpuMatRef().setTo(cv::Scalar::all(0));
else
flow1.getMatRef().setTo(cv::Scalar::all(0));
if (flow2.kind() == cv::_InputArray::GPU_MAT)
flow2.getGpuMatRef().setTo(cv::Scalar::all(0));
else
flow2.getMatRef().setTo(cv::Scalar::all(0));
flow1.setTo(cv::Scalar::all(0));
flow2.setTo(cv::Scalar::all(0));
}
}
void collectGarbage()
virtual void collectGarbage()
{
}
};
@ -181,3 +171,48 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1,
CPU_SANITY_CHECK(dst);
}
}
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
typedef Size_MatType SuperResolution_BTVL1;
OCL_PERF_TEST_P(SuperResolution_BTVL1 ,BTVL1,
Combine(Values(szSmall64, szSmall128),
Values(MatType(CV_8UC1), MatType(CV_8UC3))))
{
Size_MatType_t params = GetParam();
const Size size = get<0>(params);
const int type = get<1>(params);
Mat frame(size, type);
UMat dst(1, 1, 0);
declare.in(frame, WARMUP_RNG);
const int scale = 2;
const int iterations = 50;
const int temporalAreaRadius = 1;
Ptr<DenseOpticalFlowExt> opticalFlow(new ZeroOpticalFlow);
Ptr<SuperResolution> superRes = createSuperResolution_BTVL1();
superRes->set("scale", scale);
superRes->set("iterations", iterations);
superRes->set("temporalAreaRadius", temporalAreaRadius);
superRes->set("opticalFlow", opticalFlow);
superRes->setInput(makePtr<OneFrameSource_CPU>(frame));
// skip first frame
superRes->nextFrame(dst);
OCL_TEST_CYCLE_N(10) superRes->nextFrame(dst);
SANITY_CHECK_NOTHING();
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

@ -1,143 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
#ifdef HAVE_OPENCV_OCL
#include "opencv2/ocl.hpp"
using namespace std;
using namespace testing;
using namespace perf;
using namespace cv;
using namespace cv::superres;
namespace
{
class OneFrameSource_OCL : public FrameSource
{
public:
explicit OneFrameSource_OCL(const ocl::oclMat& frame) : frame_(frame) {}
void nextFrame(OutputArray frame)
{
ocl::getOclMatRef(frame) = frame_;
}
void reset()
{
}
private:
ocl::oclMat frame_;
};
class ZeroOpticalFlowOCL : public DenseOpticalFlowExt
{
public:
void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2)
{
ocl::oclMat& frame0_ = ocl::getOclMatRef(frame0);
ocl::oclMat& flow1_ = ocl::getOclMatRef(flow1);
ocl::oclMat& flow2_ = ocl::getOclMatRef(flow2);
cv::Size size = frame0_.size();
if(!flow2.needed())
{
flow1_.create(size, CV_32FC2);
flow1_.setTo(Scalar::all(0));
}
else
{
flow1_.create(size, CV_32FC1);
flow2_.create(size, CV_32FC1);
flow1_.setTo(Scalar::all(0));
flow2_.setTo(Scalar::all(0));
}
}
void collectGarbage()
{
}
};
}
PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL,
Combine(Values(szSmall64, szSmall128),
Values(MatType(CV_8UC1), MatType(CV_8UC3))))
{
declare.time(5 * 60);
const Size size = std::tr1::get<0>(GetParam());
const int type = std::tr1::get<1>(GetParam());
Mat frame(size, type);
declare.in(frame, WARMUP_RNG);
ocl::oclMat frame_ocl;
frame_ocl.upload(frame);
const int scale = 2;
const int iterations = 50;
const int temporalAreaRadius = 1;
Ptr<DenseOpticalFlowExt> opticalFlowOcl(new ZeroOpticalFlowOCL);
Ptr<SuperResolution> superRes_ocl = createSuperResolution_BTVL1_OCL();
superRes_ocl->set("scale", scale);
superRes_ocl->set("iterations", iterations);
superRes_ocl->set("temporalAreaRadius", temporalAreaRadius);
superRes_ocl->set("opticalFlow", opticalFlowOcl);
superRes_ocl->setInput(makePtr<OneFrameSource_OCL>(frame_ocl));
ocl::oclMat dst_ocl;
superRes_ocl->nextFrame(dst_ocl);
TEST_CYCLE_N(10) superRes_ocl->nextFrame(dst_ocl);
frame_ocl.release();
CPU_SANITY_CHECK(dst_ocl);
}
#endif

@ -44,6 +44,7 @@
// Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::superres;
@ -51,10 +52,17 @@ using namespace cv::superres::detail;
namespace
{
void calcRelativeMotions(const std::vector<Mat>& forwardMotions, const std::vector<Mat>& backwardMotions,
std::vector<Mat>& relForwardMotions, std::vector<Mat>& relBackwardMotions,
int baseIdx, Size size)
#ifdef HAVE_OPENCL
bool ocl_calcRelativeMotions(InputArrayOfArrays _forwardMotions, InputArrayOfArrays _backwardMotions,
OutputArrayOfArrays _relForwardMotions, OutputArrayOfArrays _relBackwardMotions,
int baseIdx, const Size & size)
{
std::vector<UMat> & forwardMotions = *(std::vector<UMat> *)_forwardMotions.getObj(),
& backwardMotions = *(std::vector<UMat> *)_backwardMotions.getObj(),
& relForwardMotions = *(std::vector<UMat> *)_relForwardMotions.getObj(),
& relBackwardMotions = *(std::vector<UMat> *)_relBackwardMotions.getObj();
const int count = static_cast<int>(forwardMotions.size());
relForwardMotions.resize(count);
@ -68,20 +76,84 @@ namespace
for (int i = baseIdx - 1; i >= 0; --i)
{
add(relForwardMotions[i + 1], forwardMotions[i], relForwardMotions[i]);
add(relBackwardMotions[i + 1], backwardMotions[i + 1], relBackwardMotions[i]);
}
for (int i = baseIdx + 1; i < count; ++i)
{
add(relForwardMotions[i - 1], backwardMotions[i], relForwardMotions[i]);
add(relBackwardMotions[i - 1], forwardMotions[i - 1], relBackwardMotions[i]);
}
return true;
}
#endif
void calcRelativeMotions(InputArrayOfArrays _forwardMotions, InputArrayOfArrays _backwardMotions,
OutputArrayOfArrays _relForwardMotions, OutputArrayOfArrays _relBackwardMotions,
int baseIdx, const Size & size)
{
CV_OCL_RUN(_forwardMotions.isUMatVector() && _backwardMotions.isUMatVector() &&
_relForwardMotions.isUMatVector() && _relBackwardMotions.isUMatVector(),
ocl_calcRelativeMotions(_forwardMotions, _backwardMotions, _relForwardMotions,
_relBackwardMotions, baseIdx, size))
std::vector<Mat> & forwardMotions = *(std::vector<Mat> *)_forwardMotions.getObj(),
& backwardMotions = *(std::vector<Mat> *)_backwardMotions.getObj(),
& relForwardMotions = *(std::vector<Mat> *)_relForwardMotions.getObj(),
& relBackwardMotions = *(std::vector<Mat> *)_relBackwardMotions.getObj();
const int count = static_cast<int>(forwardMotions.size());
relForwardMotions.resize(count);
relForwardMotions[baseIdx].create(size, CV_32FC2);
relForwardMotions[baseIdx].setTo(Scalar::all(0));
relBackwardMotions.resize(count);
relBackwardMotions[baseIdx].create(size, CV_32FC2);
relBackwardMotions[baseIdx].setTo(Scalar::all(0));
for (int i = baseIdx - 1; i >= 0; --i)
{
add(relForwardMotions[i + 1], forwardMotions[i], relForwardMotions[i]);
add(relBackwardMotions[i + 1], backwardMotions[i + 1], relBackwardMotions[i]);
}
for (int i = baseIdx + 1; i < count; ++i)
{
add(relForwardMotions[i - 1], backwardMotions[i], relForwardMotions[i]);
add(relBackwardMotions[i - 1], forwardMotions[i - 1], relBackwardMotions[i]);
}
}
#ifdef HAVE_OPENCL
void upscaleMotions(const std::vector<Mat>& lowResMotions, std::vector<Mat>& highResMotions, int scale)
bool ocl_upscaleMotions(InputArrayOfArrays _lowResMotions, OutputArrayOfArrays _highResMotions, int scale)
{
std::vector<UMat> & lowResMotions = *(std::vector<UMat> *)_lowResMotions.getObj(),
& highResMotions = *(std::vector<UMat> *)_highResMotions.getObj();
highResMotions.resize(lowResMotions.size());
for (size_t i = 0; i < lowResMotions.size(); ++i)
{
resize(lowResMotions[i], highResMotions[i], Size(), scale, scale, INTER_LINEAR); // TODO
multiply(highResMotions[i], Scalar::all(scale), highResMotions[i]);
}
return true;
}
#endif
void upscaleMotions(InputArrayOfArrays _lowResMotions, OutputArrayOfArrays _highResMotions, int scale)
{
CV_OCL_RUN(_lowResMotions.isUMatVector() && _highResMotions.isUMatVector(),
ocl_upscaleMotions(_lowResMotions, _highResMotions, scale))
std::vector<Mat> & lowResMotions = *(std::vector<Mat> *)_lowResMotions.getObj(),
& highResMotions = *(std::vector<Mat> *)_highResMotions.getObj();
highResMotions.resize(lowResMotions.size());
for (size_t i = 0; i < lowResMotions.size(); ++i)
@ -91,10 +163,47 @@ namespace
}
}
void buildMotionMaps(const Mat& forwardMotion, const Mat& backwardMotion, Mat& forwardMap, Mat& backwardMap)
#ifdef HAVE_OPENCL
bool ocl_buildMotionMaps(InputArray _forwardMotion, InputArray _backwardMotion,
OutputArray _forwardMap, OutputArray _backwardMap)
{
forwardMap.create(forwardMotion.size(), CV_32FC2);
backwardMap.create(forwardMotion.size(), CV_32FC2);
ocl::Kernel k("buildMotionMaps", ocl::superres::superres_btvl1_oclsrc);
if (k.empty())
return false;
UMat forwardMotion = _forwardMotion.getUMat(), backwardMotion = _backwardMotion.getUMat();
Size size = forwardMotion.size();
_forwardMap.create(size, CV_32FC2);
_backwardMap.create(size, CV_32FC2);
UMat forwardMap = _forwardMap.getUMat(), backwardMap = _backwardMap.getUMat();
k.args(ocl::KernelArg::ReadOnlyNoSize(forwardMotion),
ocl::KernelArg::ReadOnlyNoSize(backwardMotion),
ocl::KernelArg::WriteOnlyNoSize(forwardMap),
ocl::KernelArg::WriteOnly(backwardMap));
size_t globalsize[2] = { size.width, size.height };
return k.run(2, globalsize, NULL, false);
}
#endif
void buildMotionMaps(InputArray _forwardMotion, InputArray _backwardMotion,
OutputArray _forwardMap, OutputArray _backwardMap)
{
CV_OCL_RUN(_forwardMap.isUMat() && _backwardMap.isUMat(),
ocl_buildMotionMaps(_forwardMotion, _backwardMotion, _forwardMap,
_backwardMap));
Mat forwardMotion = _forwardMotion.getMat(), backwardMotion = _backwardMotion.getMat();
_forwardMap.create(forwardMotion.size(), CV_32FC2);
_backwardMap.create(forwardMotion.size(), CV_32FC2);
Mat forwardMap = _forwardMap.getMat(), backwardMap = _backwardMap.getMat();
for (int y = 0; y < forwardMotion.rows; ++y)
{
@ -114,40 +223,73 @@ namespace
}
template <typename T>
void upscaleImpl(const Mat& src, Mat& dst, int scale)
void upscaleImpl(InputArray _src, OutputArray _dst, int scale)
{
dst.create(src.rows * scale, src.cols * scale, src.type());
dst.setTo(Scalar::all(0));
Mat src = _src.getMat();
_dst.create(src.rows * scale, src.cols * scale, src.type());
_dst.setTo(Scalar::all(0));
Mat dst = _dst.getMat();
for (int y = 0, Y = 0; y < src.rows; ++y, Y += scale)
{
const T* srcRow = src.ptr<T>(y);
T* dstRow = dst.ptr<T>(Y);
const T * const srcRow = src.ptr<T>(y);
T * const dstRow = dst.ptr<T>(Y);
for (int x = 0, X = 0; x < src.cols; ++x, X += scale)
dstRow[X] = srcRow[x];
}
}
void upscale(const Mat& src, Mat& dst, int scale)
#ifdef HAVE_OPENCL
static bool ocl_upscale(InputArray _src, OutputArray _dst, int scale)
{
int type = _src.type(), cn = CV_MAT_CN(type);
ocl::Kernel k("upscale", ocl::superres::superres_btvl1_oclsrc,
format("-D cn=%d", cn));
if (k.empty())
return false;
UMat src = _src.getUMat();
_dst.create(src.rows * scale, src.cols * scale, type);
_dst.setTo(Scalar::all(0));
UMat dst = _dst.getUMat();
k.args(ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::ReadWriteNoSize(dst), scale);
size_t globalsize[2] = { src.cols, src.rows };
return k.run(2, globalsize, NULL, false);
}
#endif
typedef struct _Point4f { float ar[4]; } Point4f;
void upscale(InputArray _src, OutputArray _dst, int scale)
{
typedef void (*func_t)(const Mat& src, Mat& dst, int scale);
int cn = _src.channels();
CV_Assert( cn == 1 || cn == 3 || cn == 4 );
CV_OCL_RUN(_dst.isUMat(),
ocl_upscale(_src, _dst, scale))
typedef void (*func_t)(InputArray src, OutputArray dst, int scale);
static const func_t funcs[] =
{
0, upscaleImpl<float>, 0, upscaleImpl<Point3f>
0, upscaleImpl<float>, 0, upscaleImpl<Point3f>, upscaleImpl<Point4f>
};
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
const func_t func = funcs[src.channels()];
func(src, dst, scale);
const func_t func = funcs[cn];
CV_Assert(func != 0);
func(_src, _dst, scale);
}
float diffSign(float a, float b)
inline float diffSign(float a, float b)
{
return a > b ? 1.0f : a < b ? -1.0f : 0.0f;
}
Point3f diffSign(Point3f a, Point3f b)
{
return Point3f(
@ -157,16 +299,44 @@ namespace
);
}
void diffSign(const Mat& src1, const Mat& src2, Mat& dst)
#ifdef HAVE_OPENCL
static bool ocl_diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst)
{
const int count = src1.cols * src1.channels();
ocl::Kernel k("diffSign", ocl::superres::superres_btvl1_oclsrc);
if (k.empty())
return false;
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
_dst.create(src1.size(), src1.type());
UMat dst = _dst.getUMat();
dst.create(src1.size(), src1.type());
int cn = src1.channels();
k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
ocl::KernelArg::ReadOnlyNoSize(src2),
ocl::KernelArg::WriteOnly(dst, cn));
size_t globalsize[2] = { src1.cols * cn, src1.rows };
return k.run(2, globalsize, NULL, false);
}
#endif
void diffSign(InputArray _src1, OutputArray _src2, OutputArray _dst)
{
CV_OCL_RUN(_dst.isUMat(),
ocl_diffSign(_src1, _src2, _dst))
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
_dst.create(src1.size(), src1.type());
Mat dst = _dst.getMat();
const int count = src1.cols * src1.channels();
for (int y = 0; y < src1.rows; ++y)
{
const float* src1Ptr = src1.ptr<float>(y);
const float* src2Ptr = src2.ptr<float>(y);
const float * const src1Ptr = src1.ptr<float>(y);
const float * const src2Ptr = src2.ptr<float>(y);
float* dstPtr = dst.ptr<float>(y);
for (int x = 0; x < count; ++x)
@ -206,8 +376,8 @@ namespace
{
for (int i = range.start; i < range.end; ++i)
{
const T* srcRow = src.ptr<T>(i);
T* dstRow = dst.ptr<T>(i);
const T * const srcRow = src.ptr<T>(i);
T * const dstRow = dst.ptr<T>(i);
for(int j = ksize; j < src.cols - ksize; ++j)
{
@ -219,19 +389,20 @@ namespace
const T* srcRow3 = src.ptr<T>(i + m);
for (int l = ksize; l + m >= 0; --l, ++ind)
{
dstRow[j] += btvWeights[ind] * (diffSign(srcVal, srcRow3[j + l]) - diffSign(srcRow2[j - l], srcVal));
}
dstRow[j] += btvWeights[ind] * (diffSign(srcVal, srcRow3[j + l])
- diffSign(srcRow2[j - l], srcVal));
}
}
}
}
template <typename T>
void calcBtvRegularizationImpl(const Mat& src, Mat& dst, int btvKernelSize, const std::vector<float>& btvWeights)
void calcBtvRegularizationImpl(InputArray _src, OutputArray _dst, int btvKernelSize, const std::vector<float>& btvWeights)
{
dst.create(src.size(), src.type());
dst.setTo(Scalar::all(0));
Mat src = _src.getMat();
_dst.create(src.size(), src.type());
_dst.setTo(Scalar::all(0));
Mat dst = _dst.getMat();
const int ksize = (btvKernelSize - 1) / 2;
@ -245,17 +416,48 @@ namespace
parallel_for_(Range(ksize, src.rows - ksize), body);
}
void calcBtvRegularization(const Mat& src, Mat& dst, int btvKernelSize, const std::vector<float>& btvWeights)
#ifdef HAVE_OPENCL
static bool ocl_calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize, const UMat & ubtvWeights)
{
int cn = _src.channels();
ocl::Kernel k("calcBtvRegularization", ocl::superres::superres_btvl1_oclsrc,
format("-D cn=%d", cn));
if (k.empty())
return false;
UMat src = _src.getUMat();
_dst.create(src.size(), src.type());
_dst.setTo(Scalar::all(0));
UMat dst = _dst.getUMat();
const int ksize = (btvKernelSize - 1) / 2;
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst),
ksize, ocl::KernelArg::PtrReadOnly(ubtvWeights));
size_t globalsize[2] = { src.cols, src.rows };
return k.run(2, globalsize, NULL, false);
}
#endif
void calcBtvRegularization(InputArray _src, OutputArray _dst, int btvKernelSize,
const std::vector<float>& btvWeights, const UMat & ubtvWeights)
{
typedef void (*func_t)(const Mat& src, Mat& dst, int btvKernelSize, const std::vector<float>& btvWeights);
CV_OCL_RUN(_dst.isUMat(),
ocl_calcBtvRegularization(_src, _dst, btvKernelSize, ubtvWeights))
(void)ubtvWeights;
typedef void (*func_t)(InputArray _src, OutputArray _dst, int btvKernelSize, const std::vector<float>& btvWeights);
static const func_t funcs[] =
{
0, calcBtvRegularizationImpl<float>, 0, calcBtvRegularizationImpl<Point3f>
0, calcBtvRegularizationImpl<float>, 0, calcBtvRegularizationImpl<Point3f>, 0
};
const func_t func = funcs[src.channels()];
func(src, dst, btvKernelSize, btvWeights);
const func_t func = funcs[_src.channels()];
CV_Assert(func != 0);
func(_src, _dst, btvKernelSize, btvWeights);
}
class BTVL1_Base
@ -263,9 +465,8 @@ namespace
public:
BTVL1_Base();
void process(const std::vector<Mat>& src, Mat& dst,
const std::vector<Mat>& forwardMotions, const std::vector<Mat>& backwardMotions,
int baseIdx);
void process(InputArrayOfArrays src, OutputArray dst, InputArrayOfArrays forwardMotions,
InputArrayOfArrays backwardMotions, int baseIdx);
void collectGarbage();
@ -281,15 +482,21 @@ namespace
Ptr<DenseOpticalFlowExt> opticalFlow_;
private:
bool ocl_process(InputArrayOfArrays src, OutputArray dst, InputArrayOfArrays forwardMotions,
InputArrayOfArrays backwardMotions, int baseIdx);
Ptr<FilterEngine> filter_;
int curBlurKernelSize_;
double curBlurSigma_;
int curSrcType_;
std::vector<float> btvWeights_;
UMat ubtvWeights_;
int curBtvKernelSize_;
double curAlpha_;
// Mat
std::vector<Mat> lowResForwardMotions_;
std::vector<Mat> lowResBackwardMotions_;
@ -303,6 +510,23 @@ namespace
Mat diffTerm_, regTerm_;
Mat a_, b_, c_;
#ifdef HAVE_OPENCL
// UMat
std::vector<UMat> ulowResForwardMotions_;
std::vector<UMat> ulowResBackwardMotions_;
std::vector<UMat> uhighResForwardMotions_;
std::vector<UMat> uhighResBackwardMotions_;
std::vector<UMat> uforwardMaps_;
std::vector<UMat> ubackwardMaps_;
UMat uhighRes_;
UMat udiffTerm_, uregTerm_;
UMat ua_, ub_, uc_;
#endif
};
BTVL1_Base::BTVL1_Base()
@ -325,7 +549,101 @@ namespace
curAlpha_ = -1.0;
}
void BTVL1_Base::process(const std::vector<Mat>& src, Mat& dst, const std::vector<Mat>& forwardMotions, const std::vector<Mat>& backwardMotions, int baseIdx)
#ifdef HAVE_OPENCL
bool BTVL1_Base::ocl_process(InputArrayOfArrays _src, OutputArray _dst, InputArrayOfArrays _forwardMotions,
InputArrayOfArrays _backwardMotions, int baseIdx)
{
std::vector<UMat> & src = *(std::vector<UMat> *)_src.getObj(),
& forwardMotions = *(std::vector<UMat> *)_forwardMotions.getObj(),
& backwardMotions = *(std::vector<UMat> *)_backwardMotions.getObj();
// update blur filter and btv weights
if (!filter_ || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
{
filter_ = createGaussianFilter(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
curBlurKernelSize_ = blurKernelSize_;
curBlurSigma_ = blurSigma_;
curSrcType_ = src[0].type();
}
if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_)
{
calcBtvWeights(btvKernelSize_, alpha_, btvWeights_);
Mat(btvWeights_, true).copyTo(ubtvWeights_);
curBtvKernelSize_ = btvKernelSize_;
curAlpha_ = alpha_;
}
// calc high res motions
calcRelativeMotions(forwardMotions, backwardMotions, ulowResForwardMotions_, ulowResBackwardMotions_, baseIdx, src[0].size());
upscaleMotions(ulowResForwardMotions_, uhighResForwardMotions_, scale_);
upscaleMotions(ulowResBackwardMotions_, uhighResBackwardMotions_, scale_);
uforwardMaps_.resize(uhighResForwardMotions_.size());
ubackwardMaps_.resize(uhighResForwardMotions_.size());
for (size_t i = 0; i < uhighResForwardMotions_.size(); ++i)
buildMotionMaps(uhighResForwardMotions_[i], uhighResBackwardMotions_[i], uforwardMaps_[i], ubackwardMaps_[i]);
// initial estimation
const Size lowResSize = src[0].size();
const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);
resize(src[baseIdx], uhighRes_, highResSize, 0, 0, INTER_LINEAR); // TODO
// iterations
udiffTerm_.create(highResSize, uhighRes_.type());
ua_.create(highResSize, uhighRes_.type());
ub_.create(highResSize, uhighRes_.type());
uc_.create(lowResSize, uhighRes_.type());
for (int i = 0; i < iterations_; ++i)
{
udiffTerm_.setTo(Scalar::all(0));
for (size_t k = 0; k < src.size(); ++k)
{
// a = M * Ih
remap(uhighRes_, ua_, ubackwardMaps_[k], noArray(), INTER_NEAREST);
// b = HM * Ih
GaussianBlur(ua_, ub_, Size(blurKernelSize_, blurKernelSize_), blurSigma_);
// c = DHM * Ih
resize(ub_, uc_, lowResSize, 0, 0, INTER_NEAREST);
diffSign(src[k], uc_, uc_);
// a = Dt * diff
upscale(uc_, ua_, scale_);
// b = HtDt * diff
GaussianBlur(ua_, ub_, Size(blurKernelSize_, blurKernelSize_), blurSigma_);
// a = MtHtDt * diff
remap(ub_, ua_, uforwardMaps_[k], noArray(), INTER_NEAREST);
add(udiffTerm_, ua_, udiffTerm_);
}
if (lambda_ > 0)
{
calcBtvRegularization(uhighRes_, uregTerm_, btvKernelSize_, btvWeights_, ubtvWeights_);
addWeighted(udiffTerm_, 1.0, uregTerm_, -lambda_, 0.0, udiffTerm_);
}
addWeighted(uhighRes_, 1.0, udiffTerm_, tau_, 0.0, uhighRes_);
}
Rect inner(btvKernelSize_, btvKernelSize_, uhighRes_.cols - 2 * btvKernelSize_, uhighRes_.rows - 2 * btvKernelSize_);
uhighRes_(inner).copyTo(_dst);
return true;
}
#endif
void BTVL1_Base::process(InputArrayOfArrays _src, OutputArray _dst, InputArrayOfArrays _forwardMotions,
InputArrayOfArrays _backwardMotions, int baseIdx)
{
CV_Assert( scale_ > 1 );
CV_Assert( iterations_ > 0 );
@ -335,8 +653,15 @@ namespace
CV_Assert( blurKernelSize_ > 0 );
CV_Assert( blurSigma_ >= 0.0 );
// update blur filter and btv weights
CV_OCL_RUN(_src.isUMatVector() && _dst.isUMat() && _forwardMotions.isUMatVector() &&
_backwardMotions.isUMatVector(),
ocl_process(_src, _dst, _forwardMotions, _backwardMotions, baseIdx))
std::vector<Mat> & src = *(std::vector<Mat> *)_src.getObj(),
& forwardMotions = *(std::vector<Mat> *)_forwardMotions.getObj(),
& backwardMotions = *(std::vector<Mat> *)_backwardMotions.getObj();
// update blur filter and btv weights
if (!filter_ || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
{
filter_ = createGaussianFilter(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
@ -353,7 +678,6 @@ namespace
}
// calc high res motions
calcRelativeMotions(forwardMotions, backwardMotions, lowResForwardMotions_, lowResBackwardMotions_, baseIdx, src[0].size());
upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_);
@ -365,14 +689,12 @@ namespace
buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]);
// initial estimation
const Size lowResSize = src[0].size();
const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);
resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_CUBIC);
// iterations
diffTerm_.create(highResSize, highRes_.type());
a_.create(highResSize, highRes_.type());
b_.create(highResSize, highRes_.type());
@ -405,7 +727,7 @@ namespace
if (lambda_ > 0)
{
calcBtvRegularization(highRes_, regTerm_, btvKernelSize_, btvWeights_);
calcBtvRegularization(highRes_, regTerm_, btvKernelSize_, btvWeights_, ubtvWeights_);
addWeighted(diffTerm_, 1.0, regTerm_, -lambda_, 0.0, diffTerm_);
}
@ -413,13 +735,14 @@ namespace
}
Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_);
highRes_(inner).copyTo(dst);
highRes_(inner).copyTo(_dst);
}
void BTVL1_Base::collectGarbage()
{
filter_.release();
// Mat
lowResForwardMotions_.clear();
lowResBackwardMotions_.clear();
@ -436,11 +759,32 @@ namespace
a_.release();
b_.release();
c_.release();
#ifdef HAVE_OPENCL
// UMat
ulowResForwardMotions_.clear();
ulowResBackwardMotions_.clear();
uhighResForwardMotions_.clear();
uhighResBackwardMotions_.clear();
uforwardMaps_.clear();
ubackwardMaps_.clear();
uhighRes_.release();
udiffTerm_.release();
uregTerm_.release();
ua_.release();
ub_.release();
uc_.release();
#endif
}
////////////////////////////////////////////////////////////////////
class BTVL1 : public SuperResolution, private BTVL1_Base
class BTVL1 :
public SuperResolution, private BTVL1_Base
{
public:
AlgorithmInfo* info() const;
@ -451,14 +795,25 @@ namespace
protected:
void initImpl(Ptr<FrameSource>& frameSource);
bool ocl_initImpl(Ptr<FrameSource>& frameSource);
void processImpl(Ptr<FrameSource>& frameSource, OutputArray output);
bool ocl_processImpl(Ptr<FrameSource>& frameSource, OutputArray output);
private:
int temporalAreaRadius_;
void readNextFrame(Ptr<FrameSource>& frameSource);
bool ocl_readNextFrame(Ptr<FrameSource>& frameSource);
void processFrame(int idx);
bool ocl_processFrame(int idx);
int storePos_;
int procPos_;
int outPos_;
// Mat
Mat curFrame_;
Mat prevFrame_;
@ -467,14 +822,25 @@ namespace
std::vector<Mat> backwardMotions_;
std::vector<Mat> outputs_;
int storePos_;
int procPos_;
int outPos_;
std::vector<Mat> srcFrames_;
std::vector<Mat> srcForwardMotions_;
std::vector<Mat> srcBackwardMotions_;
Mat finalOutput_;
#ifdef HAVE_OPENCL
// UMat
UMat ucurFrame_;
UMat uprevFrame_;
std::vector<UMat> uframes_;
std::vector<UMat> uforwardMotions_;
std::vector<UMat> ubackwardMotions_;
std::vector<UMat> uoutputs_;
std::vector<UMat> usrcFrames_;
std::vector<UMat> usrcForwardMotions_;
std::vector<UMat> usrcBackwardMotions_;
#endif
};
CV_INIT_ALGORITHM(BTVL1, "SuperResolution.BTVL1",
@ -496,6 +862,7 @@ namespace
void BTVL1::collectGarbage()
{
// Mat
curFrame_.release();
prevFrame_.release();
@ -509,10 +876,52 @@ namespace
srcBackwardMotions_.clear();
finalOutput_.release();
#ifdef HAVE_OPENCL
// UMat
ucurFrame_.release();
uprevFrame_.release();
uframes_.clear();
uforwardMotions_.clear();
ubackwardMotions_.clear();
uoutputs_.clear();
usrcFrames_.clear();
usrcForwardMotions_.clear();
usrcBackwardMotions_.clear();
#endif
SuperResolution::collectGarbage();
BTVL1_Base::collectGarbage();
}
#ifdef HAVE_OPENCL
bool BTVL1::ocl_initImpl(Ptr<FrameSource>& frameSource)
{
const int cacheSize = 2 * temporalAreaRadius_ + 1;
uframes_.resize(cacheSize);
uforwardMotions_.resize(cacheSize);
ubackwardMotions_.resize(cacheSize);
uoutputs_.resize(cacheSize);
storePos_ = -1;
for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t)
readNextFrame(frameSource);
for (int i = 0; i <= temporalAreaRadius_; ++i)
processFrame(i);
procPos_ = temporalAreaRadius_;
outPos_ = -1;
return true;
}
#endif
void BTVL1::initImpl(Ptr<FrameSource>& frameSource)
{
const int cacheSize = 2 * temporalAreaRadius_ + 1;
@ -522,6 +931,9 @@ namespace
backwardMotions_.resize(cacheSize);
outputs_.resize(cacheSize);
CV_OCL_RUN(isUmat_,
ocl_initImpl(frameSource))
storePos_ = -1;
for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t)
@ -534,6 +946,18 @@ namespace
outPos_ = -1;
}
#ifdef HAVE_OPENCL
bool BTVL1::ocl_processImpl(Ptr<FrameSource>& /*frameSource*/, OutputArray _output)
{
const UMat& curOutput = at(outPos_, uoutputs_);
curOutput.convertTo(_output, CV_8U);
return true;
}
#endif
void BTVL1::processImpl(Ptr<FrameSource>& frameSource, OutputArray _output)
{
if (outPos_ >= storePos_)
@ -549,11 +973,14 @@ namespace
++procPos_;
processFrame(procPos_);
}
++outPos_;
CV_OCL_RUN(isUmat_,
ocl_processImpl(frameSource, _output))
const Mat& curOutput = at(outPos_, outputs_);
if (_output.kind() < _InputArray::OPENGL_BUFFER)
if (_output.kind() < _InputArray::OPENGL_BUFFER || _output.isUMat())
curOutput.convertTo(_output, CV_8U);
else
{
@ -562,14 +989,41 @@ namespace
}
}
#ifdef HAVE_OPENCL
bool BTVL1::ocl_readNextFrame(Ptr<FrameSource>& /*frameSource*/)
{
ucurFrame_.convertTo(at(storePos_, uframes_), CV_32F);
if (storePos_ > 0)
{
opticalFlow_->calc(uprevFrame_, ucurFrame_, at(storePos_ - 1, uforwardMotions_));
opticalFlow_->calc(ucurFrame_, uprevFrame_, at(storePos_, ubackwardMotions_));
}
ucurFrame_.copyTo(uprevFrame_);
return true;
}
#endif
void BTVL1::readNextFrame(Ptr<FrameSource>& frameSource)
{
frameSource->nextFrame(curFrame_);
if (curFrame_.empty())
return;
#ifdef HAVE_OPENCL
if (isUmat_ && curFrame_.channels() == 1)
curFrame_.copyTo(ucurFrame_);
else
isUmat_ = false;
#endif
++storePos_;
CV_OCL_RUN(isUmat_,
ocl_readNextFrame(frameSource))
curFrame_.convertTo(at(storePos_, frames_), CV_32F);
if (storePos_ > 0)
@ -581,8 +1035,47 @@ namespace
curFrame_.copyTo(prevFrame_);
}
#ifdef HAVE_OPENCL
bool BTVL1::ocl_processFrame(int idx)
{
const int startIdx = std::max(idx - temporalAreaRadius_, 0);
const int procIdx = idx;
const int endIdx = std::min(startIdx + 2 * temporalAreaRadius_, storePos_);
const int count = endIdx - startIdx + 1;
usrcFrames_.resize(count);
usrcForwardMotions_.resize(count);
usrcBackwardMotions_.resize(count);
int baseIdx = -1;
for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k)
{
if (i == procIdx)
baseIdx = k;
usrcFrames_[k] = at(i, uframes_);
if (i < endIdx)
usrcForwardMotions_[k] = at(i, uforwardMotions_);
if (i > startIdx)
usrcBackwardMotions_[k] = at(i, ubackwardMotions_);
}
process(usrcFrames_, at(idx, uoutputs_), usrcForwardMotions_, usrcBackwardMotions_, baseIdx);
return true;
}
#endif
void BTVL1::processFrame(int idx)
{
CV_OCL_RUN(isUmat_,
ocl_processFrame(idx))
const int startIdx = std::max(idx - temporalAreaRadius_, 0);
const int procIdx = idx;
const int endIdx = std::min(startIdx + 2 * temporalAreaRadius_, storePos_);

@ -1,725 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jin Ma, jin@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
// S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution.
// Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
#include "precomp.hpp"
#if !defined(HAVE_OPENCL) || !defined(HAVE_OPENCV_OCL)
cv::Ptr<cv::superres::SuperResolution> cv::superres::createSuperResolution_BTVL1_OCL()
{
CV_Error(cv::Error::StsNotImplemented, "The called functionality is disabled for current build or platform");
return Ptr<SuperResolution>();
}
#else
#include "opencl_kernels.hpp"
using namespace std;
using namespace cv;
using namespace cv::ocl;
using namespace cv::superres;
using namespace cv::superres::detail;
static ProgramEntry superres_btvl1 = cv::ocl::superres::superres_btvl1;
namespace cv
{
namespace ocl
{
float* btvWeights_ = NULL;
size_t btvWeights_size = 0;
oclMat c_btvRegWeights;
}
}
namespace btv_l1_device_ocl
{
void buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY,
const oclMat& backwardMotionX, const oclMat& bacwardMotionY,
oclMat& forwardMapX, oclMat& forwardMapY,
oclMat& backwardMapX, oclMat& backwardMapY);
void upscale(const oclMat& src, oclMat& dst, int scale);
void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst);
void calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize);
}
void btv_l1_device_ocl::buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY,
const oclMat& backwardMotionX, const oclMat& backwardMotionY,
oclMat& forwardMapX, oclMat& forwardMapY,
oclMat& backwardMapX, oclMat& backwardMapY)
{
Context* clCxt = Context::getContext();
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {forwardMapX.cols, forwardMapX.rows, 1};
int forwardMotionX_step = (int)(forwardMotionX.step/forwardMotionX.elemSize());
int forwardMotionY_step = (int)(forwardMotionY.step/forwardMotionY.elemSize());
int backwardMotionX_step = (int)(backwardMotionX.step/backwardMotionX.elemSize());
int backwardMotionY_step = (int)(backwardMotionY.step/backwardMotionY.elemSize());
int forwardMapX_step = (int)(forwardMapX.step/forwardMapX.elemSize());
int forwardMapY_step = (int)(forwardMapY.step/forwardMapY.elemSize());
int backwardMapX_step = (int)(backwardMapX.step/backwardMapX.elemSize());
int backwardMapY_step = (int)(backwardMapY.step/backwardMapY.elemSize());
String kernel_name = "buildMotionMapsKernel";
vector< pair<size_t, const void*> > args;
args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionX.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionY.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionX.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionY.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapX.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapY.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapX.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapY.data));
args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX.rows));
args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY.cols));
args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionX_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionY_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapX_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapY_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapX_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapY_step));
openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
}
void btv_l1_device_ocl::upscale(const oclMat& src, oclMat& dst, int scale)
{
Context* clCxt = Context::getContext();
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {src.cols, src.rows, 1};
int src_step = (int)(src.step/src.elemSize());
int dst_step = (int)(dst.step/dst.elemSize());
String kernel_name = "upscaleKernel";
vector< pair<size_t, const void*> > args;
int cn = src.oclchannels();
args.push_back(make_pair(sizeof(cl_mem), (void*)&src.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data));
args.push_back(make_pair(sizeof(cl_int), (void*)&src_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows));
args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols));
args.push_back(make_pair(sizeof(cl_int), (void*)&scale));
args.push_back(make_pair(sizeof(cl_int), (void*)&cn));
openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
}
void btv_l1_device_ocl::diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst)
{
Context* clCxt = Context::getContext();
oclMat src1_ = src1.reshape(1);
oclMat src2_ = src2.reshape(1);
oclMat dst_ = dst.reshape(1);
int src1_step = (int)(src1_.step/src1_.elemSize());
int src2_step = (int)(src2_.step/src2_.elemSize());
int dst_step = (int)(dst_.step/dst_.elemSize());
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {src1_.cols, src1_.rows, 1};
String kernel_name = "diffSignKernel";
vector< pair<size_t, const void*> > args;
args.push_back(make_pair(sizeof(cl_mem), (void*)&src1_.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&src2_.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data));
args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.rows));
args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.cols));
args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&src1_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&src2_step));
openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
}
void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize)
{
Context* clCxt = Context::getContext();
oclMat src_ = src.reshape(1);
oclMat dst_ = dst.reshape(1);
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {src.cols, src.rows, 1};
int src_step = (int)(src_.step/src_.elemSize());
int dst_step = (int)(dst_.step/dst_.elemSize());
String kernel_name = "calcBtvRegularizationKernel";
vector< pair<size_t, const void*> > args;
int cn = src.oclchannels();
args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data));
args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data));
args.push_back(make_pair(sizeof(cl_int), (void*)&src_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step));
args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows));
args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols));
args.push_back(make_pair(sizeof(cl_int), (void*)&ksize));
args.push_back(make_pair(sizeof(cl_int), (void*)&cn));
args.push_back(make_pair(sizeof(cl_mem), (void*)&c_btvRegWeights.data));
openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1);
}
namespace
{
void calcRelativeMotions(const vector<pair<oclMat, oclMat> >& forwardMotions, const vector<pair<oclMat, oclMat> >& backwardMotions,
vector<pair<oclMat, oclMat> >& relForwardMotions, vector<pair<oclMat, oclMat> >& relBackwardMotions,
int baseIdx, Size size)
{
const int count = static_cast<int>(forwardMotions.size());
relForwardMotions.resize(count);
relForwardMotions[baseIdx].first.create(size, CV_32FC1);
relForwardMotions[baseIdx].first.setTo(Scalar::all(0));
relForwardMotions[baseIdx].second.create(size, CV_32FC1);
relForwardMotions[baseIdx].second.setTo(Scalar::all(0));
relBackwardMotions.resize(count);
relBackwardMotions[baseIdx].first.create(size, CV_32FC1);
relBackwardMotions[baseIdx].first.setTo(Scalar::all(0));
relBackwardMotions[baseIdx].second.create(size, CV_32FC1);
relBackwardMotions[baseIdx].second.setTo(Scalar::all(0));
for (int i = baseIdx - 1; i >= 0; --i)
{
ocl::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first);
ocl::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second);
ocl::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first);
ocl::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second);
}
for (int i = baseIdx + 1; i < count; ++i)
{
ocl::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first);
ocl::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second);
ocl::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first);
ocl::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second);
}
}
void upscaleMotions(const vector<pair<oclMat, oclMat> >& lowResMotions, vector<pair<oclMat, oclMat> >& highResMotions, int scale)
{
highResMotions.resize(lowResMotions.size());
for (size_t i = 0; i < lowResMotions.size(); ++i)
{
ocl::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_LINEAR);
ocl::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_LINEAR);
ocl::multiply(scale, highResMotions[i].first, highResMotions[i].first);
ocl::multiply(scale, highResMotions[i].second, highResMotions[i].second);
}
}
void buildMotionMaps(const pair<oclMat, oclMat>& forwardMotion, const pair<oclMat, oclMat>& backwardMotion,
pair<oclMat, oclMat>& forwardMap, pair<oclMat, oclMat>& backwardMap)
{
forwardMap.first.create(forwardMotion.first.size(), CV_32FC1);
forwardMap.second.create(forwardMotion.first.size(), CV_32FC1);
backwardMap.first.create(forwardMotion.first.size(), CV_32FC1);
backwardMap.second.create(forwardMotion.first.size(), CV_32FC1);
btv_l1_device_ocl::buildMotionMaps(forwardMotion.first, forwardMotion.second,
backwardMotion.first, backwardMotion.second,
forwardMap.first, forwardMap.second,
backwardMap.first, backwardMap.second);
}
void upscale(const oclMat& src, oclMat& dst, int scale)
{
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
btv_l1_device_ocl::upscale(src, dst, scale);
}
void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst)
{
dst.create(src1.size(), src1.type());
btv_l1_device_ocl::diffSign(src1, src2, dst);
}
void calcBtvWeights(int btvKernelSize, double alpha, vector<float>& btvWeights)
{
const size_t size = btvKernelSize * btvKernelSize;
btvWeights.resize(size);
const int ksize = (btvKernelSize - 1) / 2;
const float alpha_f = static_cast<float>(alpha);
for (int m = 0, ind = 0; m <= ksize; ++m)
{
for (int l = ksize; l + m >= 0; --l, ++ind)
btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l));
}
btvWeights_ = &btvWeights[0];
btvWeights_size = size;
Mat btvWeights_mheader(1, static_cast<int>(size), CV_32FC1, btvWeights_);
c_btvRegWeights = btvWeights_mheader;
}
void calcBtvRegularization(const oclMat& src, oclMat& dst, int btvKernelSize)
{
dst.create(src.size(), src.type());
const int ksize = (btvKernelSize - 1) / 2;
btv_l1_device_ocl::calcBtvRegularization(src, dst, ksize);
}
class BTVL1_OCL_Base
{
public:
BTVL1_OCL_Base();
void process(const vector<oclMat>& src, oclMat& dst,
const vector<pair<oclMat, oclMat> >& forwardMotions, const vector<pair<oclMat, oclMat> >& backwardMotions,
int baseIdx);
void collectGarbage();
protected:
int scale_;
int iterations_;
double lambda_;
double tau_;
double alpha_;
int btvKernelSize_;
int blurKernelSize_;
double blurSigma_;
Ptr<DenseOpticalFlowExt> opticalFlow_;
private:
vector<Ptr<cv::ocl::FilterEngine_GPU> > filters_;
int curBlurKernelSize_;
double curBlurSigma_;
int curSrcType_;
vector<float> btvWeights_;
int curBtvKernelSize_;
double curAlpha_;
vector<pair<oclMat, oclMat> > lowResForwardMotions_;
vector<pair<oclMat, oclMat> > lowResBackwardMotions_;
vector<pair<oclMat, oclMat> > highResForwardMotions_;
vector<pair<oclMat, oclMat> > highResBackwardMotions_;
vector<pair<oclMat, oclMat> > forwardMaps_;
vector<pair<oclMat, oclMat> > backwardMaps_;
oclMat highRes_;
vector<oclMat> diffTerms_;
oclMat a_, b_, c_, d_;
oclMat regTerm_;
};
BTVL1_OCL_Base::BTVL1_OCL_Base()
{
scale_ = 4;
iterations_ = 180;
lambda_ = 0.03;
tau_ = 1.3;
alpha_ = 0.7;
btvKernelSize_ = 7;
blurKernelSize_ = 5;
blurSigma_ = 0.0;
opticalFlow_ = createOptFlow_Farneback_OCL();
curBlurKernelSize_ = -1;
curBlurSigma_ = -1.0;
curSrcType_ = -1;
curBtvKernelSize_ = -1;
curAlpha_ = -1.0;
}
void BTVL1_OCL_Base::process(const vector<oclMat>& src, oclMat& dst,
const vector<pair<oclMat, oclMat> >& forwardMotions, const vector<pair<oclMat, oclMat> >& backwardMotions,
int baseIdx)
{
CV_Assert( scale_ > 1 );
CV_Assert( iterations_ > 0 );
CV_Assert( tau_ > 0.0 );
CV_Assert( alpha_ > 0.0 );
CV_Assert( btvKernelSize_ > 0 && btvKernelSize_ <= 16 );
CV_Assert( blurKernelSize_ > 0 );
CV_Assert( blurSigma_ >= 0.0 );
// update blur filter and btv weights
if (filters_.size() != src.size() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)
{
filters_.resize(src.size());
for (size_t i = 0; i < src.size(); ++i)
filters_[i] = cv::ocl::createGaussianFilter_GPU(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_);
curBlurKernelSize_ = blurKernelSize_;
curBlurSigma_ = blurSigma_;
curSrcType_ = src[0].type();
}
if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_)
{
calcBtvWeights(btvKernelSize_, alpha_, btvWeights_);
curBtvKernelSize_ = btvKernelSize_;
curAlpha_ = alpha_;
}
// calc motions between input frames
calcRelativeMotions(forwardMotions, backwardMotions,
lowResForwardMotions_, lowResBackwardMotions_,
baseIdx, src[0].size());
upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_);
upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_);
forwardMaps_.resize(highResForwardMotions_.size());
backwardMaps_.resize(highResForwardMotions_.size());
for (size_t i = 0; i < highResForwardMotions_.size(); ++i)
{
buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]);
}
// initial estimation
const Size lowResSize = src[0].size();
const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);
ocl::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_LINEAR);
// iterations
diffTerms_.resize(src.size());
bool d_inited = false;
a_.create(highRes_.size(), highRes_.type());
b_.create(highRes_.size(), highRes_.type());
c_.create(lowResSize, highRes_.type());
d_.create(highRes_.rows, highRes_.cols, highRes_.type());
for (int i = 0; i < iterations_; ++i)
{
if(!d_inited)
{
d_.setTo(0);
d_inited = true;
}
for (size_t k = 0; k < src.size(); ++k)
{
diffTerms_[k].create(highRes_.size(), highRes_.type());
// a = M * Ih
ocl::remap(highRes_, a_, backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar());
// b = HM * Ih
filters_[k]->apply(a_, b_, Rect(0,0,-1,-1));
// c = DHF * Ih
ocl::resize(b_, c_, lowResSize, 0, 0, INTER_NEAREST);
diffSign(src[k], c_, c_);
// a = Dt * diff
upscale(c_, d_, scale_);
// b = HtDt * diff
filters_[k]->apply(d_, b_, Rect(0,0,-1,-1));
// diffTerm = MtHtDt * diff
ocl::remap(b_, diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar());
}
if (lambda_ > 0)
{
calcBtvRegularization(highRes_, regTerm_, btvKernelSize_);
ocl::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_);
}
for (size_t k = 0; k < src.size(); ++k)
{
ocl::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_);
}
}
Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_);
highRes_(inner).copyTo(dst);
}
void BTVL1_OCL_Base::collectGarbage()
{
filters_.clear();
lowResForwardMotions_.clear();
lowResBackwardMotions_.clear();
highResForwardMotions_.clear();
highResBackwardMotions_.clear();
forwardMaps_.clear();
backwardMaps_.clear();
highRes_.release();
diffTerms_.clear();
a_.release();
b_.release();
c_.release();
regTerm_.release();
c_btvRegWeights.release();
}
////////////////////////////////////////////////////////////
class BTVL1_OCL : public SuperResolution, private BTVL1_OCL_Base
{
public:
AlgorithmInfo* info() const;
BTVL1_OCL();
void collectGarbage();
protected:
void initImpl(Ptr<FrameSource>& frameSource);
void processImpl(Ptr<FrameSource>& frameSource, OutputArray output);
private:
int temporalAreaRadius_;
void readNextFrame(Ptr<FrameSource>& frameSource);
void processFrame(int idx);
oclMat curFrame_;
oclMat prevFrame_;
vector<oclMat> frames_;
vector<pair<oclMat, oclMat> > forwardMotions_;
vector<pair<oclMat, oclMat> > backwardMotions_;
vector<oclMat> outputs_;
int storePos_;
int procPos_;
int outPos_;
vector<oclMat> srcFrames_;
vector<pair<oclMat, oclMat> > srcForwardMotions_;
vector<pair<oclMat, oclMat> > srcBackwardMotions_;
oclMat finalOutput_;
};
CV_INIT_ALGORITHM(BTVL1_OCL, "SuperResolution.BTVL1_OCL",
obj.info()->addParam(obj, "scale", obj.scale_, false, 0, 0, "Scale factor.");
obj.info()->addParam(obj, "iterations", obj.iterations_, false, 0, 0, "Iteration count.");
obj.info()->addParam(obj, "tau", obj.tau_, false, 0, 0, "Asymptotic value of steepest descent method.");
obj.info()->addParam(obj, "lambda", obj.lambda_, false, 0, 0, "Weight parameter to balance data term and smoothness term.");
obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Parameter of spacial distribution in Bilateral-TV.");
obj.info()->addParam(obj, "btvKernelSize", obj.btvKernelSize_, false, 0, 0, "Kernel size of Bilateral-TV filter.");
obj.info()->addParam(obj, "blurKernelSize", obj.blurKernelSize_, false, 0, 0, "Gaussian blur kernel size.");
obj.info()->addParam(obj, "blurSigma", obj.blurSigma_, false, 0, 0, "Gaussian blur sigma.");
obj.info()->addParam(obj, "temporalAreaRadius", obj.temporalAreaRadius_, false, 0, 0, "Radius of the temporal search area.");
obj.info()->addParam<DenseOpticalFlowExt>(obj, "opticalFlow", obj.opticalFlow_, false, 0, 0, "Dense optical flow algorithm."))
BTVL1_OCL::BTVL1_OCL()
{
temporalAreaRadius_ = 4;
}
void BTVL1_OCL::collectGarbage()
{
curFrame_.release();
prevFrame_.release();
frames_.clear();
forwardMotions_.clear();
backwardMotions_.clear();
outputs_.clear();
srcFrames_.clear();
srcForwardMotions_.clear();
srcBackwardMotions_.clear();
finalOutput_.release();
SuperResolution::collectGarbage();
BTVL1_OCL_Base::collectGarbage();
}
void BTVL1_OCL::initImpl(Ptr<FrameSource>& frameSource)
{
const int cacheSize = 2 * temporalAreaRadius_ + 1;
frames_.resize(cacheSize);
forwardMotions_.resize(cacheSize);
backwardMotions_.resize(cacheSize);
outputs_.resize(cacheSize);
storePos_ = -1;
for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t)
readNextFrame(frameSource);
for (int i = 0; i <= temporalAreaRadius_; ++i)
processFrame(i);
procPos_ = temporalAreaRadius_;
outPos_ = -1;
}
void BTVL1_OCL::processImpl(Ptr<FrameSource>& frameSource, OutputArray _output)
{
if (outPos_ >= storePos_)
{
if(_output.kind() == _InputArray::OCL_MAT)
{
getOclMatRef(_output).release();
}
else
{
_output.release();
}
return;
}
readNextFrame(frameSource);
if (procPos_ < storePos_)
{
++procPos_;
processFrame(procPos_);
}
++outPos_;
const oclMat& curOutput = at(outPos_, outputs_);
if (_output.kind() == _InputArray::OCL_MAT)
curOutput.convertTo(getOclMatRef(_output), CV_8U);
else
{
curOutput.convertTo(finalOutput_, CV_8U);
arrCopy(finalOutput_, _output);
}
}
void BTVL1_OCL::readNextFrame(Ptr<FrameSource>& frameSource)
{
curFrame_.release();
frameSource->nextFrame(curFrame_);
if (curFrame_.empty())
return;
++storePos_;
curFrame_.convertTo(at(storePos_, frames_), CV_32F);
if (storePos_ > 0)
{
pair<oclMat, oclMat>& forwardMotion = at(storePos_ - 1, forwardMotions_);
pair<oclMat, oclMat>& backwardMotion = at(storePos_, backwardMotions_);
opticalFlow_->calc(prevFrame_, curFrame_, forwardMotion.first, forwardMotion.second);
opticalFlow_->calc(curFrame_, prevFrame_, backwardMotion.first, backwardMotion.second);
}
curFrame_.copyTo(prevFrame_);
}
void BTVL1_OCL::processFrame(int idx)
{
const int startIdx = max(idx - temporalAreaRadius_, 0);
const int procIdx = idx;
const int endIdx = min(startIdx + 2 * temporalAreaRadius_, storePos_);
const int count = endIdx - startIdx + 1;
srcFrames_.resize(count);
srcForwardMotions_.resize(count);
srcBackwardMotions_.resize(count);
int baseIdx = -1;
for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k)
{
if (i == procIdx)
baseIdx = k;
srcFrames_[k] = at(i, frames_);
if (i < endIdx)
srcForwardMotions_[k] = at(i, forwardMotions_);
if (i > startIdx)
srcBackwardMotions_[k] = at(i, backwardMotions_);
}
process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx);
}
}
Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_OCL()
{
return makePtr<BTVL1_OCL>();
}
#endif

@ -115,25 +115,18 @@ namespace
void CaptureFrameSource::nextFrame(OutputArray _frame)
{
if (_frame.kind() == _InputArray::MAT)
{
vc_ >> _frame.getMatRef();
}
else if(_frame.kind() == _InputArray::GPU_MAT)
{
vc_ >> frame_;
arrCopy(frame_, _frame);
}
else if(_frame.kind() == _InputArray::OCL_MAT)
{
vc_ >> frame_;
if(!frame_.empty())
{
arrCopy(frame_, _frame);
}
}
else if (_frame.isUMat())
vc_ >> *(UMat *)_frame.getObj();
else
{
//should never get here
// should never get here
CV_Assert(0);
}
}

@ -62,6 +62,23 @@ Mat cv::superres::arrGetMat(InputArray arr, Mat& buf)
}
}
UMat cv::superres::arrGetUMat(InputArray arr, UMat& buf)
{
switch (arr.kind())
{
case _InputArray::GPU_MAT:
arr.getGpuMat().download(buf);
return buf;
case _InputArray::OPENGL_BUFFER:
arr.getOGlBuffer().copyTo(buf);
return buf;
default:
return arr.getUMat();
}
}
GpuMat cv::superres::arrGetGpuMat(InputArray arr, GpuMat& buf)
{
switch (arr.kind())
@ -108,62 +125,39 @@ namespace
{
src.getGpuMat().copyTo(dst.getGpuMatRef());
}
#ifdef HAVE_OPENCV_OCL
void ocl2mat(InputArray src, OutputArray dst)
{
dst.getMatRef() = (Mat)ocl::getOclMatRef(src);
}
void mat2ocl(InputArray src, OutputArray dst)
{
Mat m = src.getMat();
ocl::getOclMatRef(dst) = (ocl::oclMat)m;
}
void ocl2ocl(InputArray src, OutputArray dst)
{
ocl::getOclMatRef(src).copyTo(ocl::getOclMatRef(dst));
}
#else
void ocl2mat(InputArray, OutputArray)
{
CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");;
}
void mat2ocl(InputArray, OutputArray)
{
CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");;
}
void ocl2ocl(InputArray, OutputArray)
{
CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");
}
#endif
}
void cv::superres::arrCopy(InputArray src, OutputArray dst)
{
if (dst.isUMat() || src.isUMat())
{
src.copyTo(dst);
return;
}
typedef void (*func_t)(InputArray src, OutputArray dst);
static const func_t funcs[11][11] =
static const func_t funcs[10][10] =
{
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
{0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0 /*arr2tex*/, mat2gpu, mat2ocl},
{0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0 /*buf2arr*/, buf2arr, 0 },
{0, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0 /*tex2arr*/, 0},
{0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, 0 /*arr2tex*/, gpu2gpu, 0 },
{0, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, 0, 0, 0, ocl2ocl}
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu },
{ 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu },
{ 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu },
{ 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu },
{ 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu },
{ 0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, 0, mat2gpu },
{ 0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0, buf2arr },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, 0 , gpu2gpu },
};
const int src_kind = src.kind() >> _InputArray::KIND_SHIFT;
const int dst_kind = dst.kind() >> _InputArray::KIND_SHIFT;
CV_DbgAssert( src_kind >= 0 && src_kind < 11 );
CV_DbgAssert( dst_kind >= 0 && dst_kind < 11 );
CV_Assert( src_kind >= 0 && src_kind < 10 );
CV_Assert( dst_kind >= 0 && dst_kind < 10 );
const func_t func = funcs[src_kind][dst_kind];
CV_DbgAssert( func != 0 );
CV_Assert( func != 0 );
func(src, dst);
}
@ -172,20 +166,21 @@ namespace
{
void convertToCn(InputArray src, OutputArray dst, int cn)
{
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
int scn = src.channels();
CV_Assert( scn == 1 || scn == 3 || scn == 4 );
CV_Assert( cn == 1 || cn == 3 || cn == 4 );
static const int codes[5][5] =
{
{-1, -1, -1, -1, -1},
{-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA},
{-1, -1, -1, -1, -1},
{-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA},
{-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1},
{ -1, -1, -1, -1, -1 },
{ -1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA },
{ -1, -1, -1, -1, -1 },
{ -1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA },
{ -1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1 }
};
const int code = codes[src.channels()][cn];
CV_DbgAssert( code >= 0 );
const int code = codes[scn][cn];
CV_Assert( code >= 0 );
switch (src.kind())
{
@ -202,6 +197,7 @@ namespace
break;
}
}
void convertToDepth(InputArray src, OutputArray dst, int depth)
{
CV_Assert( src.depth() <= CV_64F );
@ -226,6 +222,11 @@ namespace
src.getGpuMat().convertTo(dst.getGpuMatRef(), depth, scale);
break;
case _InputArray::UMAT:
case _InputArray::UEXPR:
src.getUMat().convertTo(dst, depth, scale);
break;
default:
src.getMat().convertTo(dst, depth, scale);
break;
@ -258,7 +259,7 @@ Mat cv::superres::convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1)
return buf1;
}
GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, GpuMat& buf1)
UMat cv::superres::convertToType(const UMat& src, int type, UMat& buf0, UMat& buf1)
{
if (src.type() == type)
return src;
@ -282,49 +283,8 @@ GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, Gp
convertToDepth(buf0, buf1, depth);
return buf1;
}
#ifdef HAVE_OPENCV_OCL
namespace
{
// TODO(pengx17): remove these overloaded functions until IntputArray fully supports oclMat
void convertToCn(const ocl::oclMat& src, ocl::oclMat& dst, int cn)
{
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
CV_Assert( cn == 1 || cn == 3 || cn == 4 );
static const int codes[5][5] =
{
{-1, -1, -1, -1, -1},
{-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA},
{-1, -1, -1, -1, -1},
{-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA},
{-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1},
};
const int code = codes[src.channels()][cn];
CV_DbgAssert( code >= 0 );
ocl::cvtColor(src, dst, code, cn);
}
void convertToDepth(const ocl::oclMat& src, ocl::oclMat& dst, int depth)
{
CV_Assert( src.depth() <= CV_64F );
CV_Assert( depth == CV_8U || depth == CV_32F );
static const double maxVals[] =
{
std::numeric_limits<uchar>::max(),
std::numeric_limits<schar>::max(),
std::numeric_limits<ushort>::max(),
std::numeric_limits<short>::max(),
std::numeric_limits<int>::max(),
1.0,
1.0,
};
const double scale = maxVals[depth] / maxVals[src.depth()];
src.convertTo(dst, depth, scale);
}
}
ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1)
GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, GpuMat& buf1)
{
if (src.type() == type)
return src;
@ -348,4 +308,3 @@ ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::o
convertToDepth(buf0, buf1, depth);
return buf1;
}
#endif

@ -45,25 +45,20 @@
#include "opencv2/core.hpp"
#include "opencv2/core/cuda.hpp"
#ifdef HAVE_OPENCV_OCL
#include "opencv2/ocl.hpp"
#endif
namespace cv
{
namespace superres
{
CV_EXPORTS Mat arrGetMat(InputArray arr, Mat& buf);
CV_EXPORTS UMat arrGetUMat(InputArray arr, UMat& buf);
CV_EXPORTS cuda::GpuMat arrGetGpuMat(InputArray arr, cuda::GpuMat& buf);
CV_EXPORTS void arrCopy(InputArray src, OutputArray dst);
CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1);
CV_EXPORTS UMat convertToType(const UMat& src, int type, UMat& buf0, UMat& buf1);
CV_EXPORTS cuda::GpuMat convertToType(const cuda::GpuMat& src, int type, cuda::GpuMat& buf0, cuda::GpuMat& buf1);
#ifdef HAVE_OPENCV_OCL
CV_EXPORTS ocl::oclMat convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1);
#endif
}
}

@ -43,160 +43,137 @@
//
//M*/
__kernel void buildMotionMapsKernel(__global float* forwardMotionX,
__global float* forwardMotionY,
__global float* backwardMotionX,
__global float* backwardMotionY,
__global float* forwardMapX,
__global float* forwardMapY,
__global float* backwardMapX,
__global float* backwardMapY,
int forwardMotionX_row,
int forwardMotionX_col,
int forwardMotionX_step,
int forwardMotionY_step,
int backwardMotionX_step,
int backwardMotionY_step,
int forwardMapX_step,
int forwardMapY_step,
int backwardMapX_step,
int backwardMapY_step
)
#ifndef cn
#define cn 1
#endif
#define sz (int)sizeof(float)
#define src_elem_at(_src, y, step, x) *(__global const float *)(_src + mad24(y, step, (x) * sz))
#define dst_elem_at(_dst, y, step, x) *(__global float *)(_dst + mad24(y, step, (x) * sz))
__kernel void buildMotionMaps(__global const uchar * forwardMotionPtr, int forwardMotion_step, int forwardMotion_offset,
__global const uchar * backwardMotionPtr, int backwardMotion_step, int backwardMotion_offset,
__global const uchar * forwardMapPtr, int forwardMap_step, int forwardMap_offset,
__global const uchar * backwardMapPtr, int backwardMap_step, int backwardMap_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < forwardMotionX_col && y < forwardMotionX_row)
if (x < cols && y < rows)
{
float fx = forwardMotionX[y * forwardMotionX_step + x];
float fy = forwardMotionY[y * forwardMotionY_step + x];
int forwardMotion_index = mad24(forwardMotion_step, y, (int)sizeof(float2) * x + forwardMotion_offset);
int backwardMotion_index = mad24(backwardMotion_step, y, (int)sizeof(float2) * x + backwardMotion_offset);
int forwardMap_index = mad24(forwardMap_step, y, (int)sizeof(float2) * x + forwardMap_offset);
int backwardMap_index = mad24(backwardMap_step, y, (int)sizeof(float2) * x + backwardMap_offset);
float bx = backwardMotionX[y * backwardMotionX_step + x];
float by = backwardMotionY[y * backwardMotionY_step + x];
float2 forwardMotion = *(__global const float2 *)(forwardMotionPtr + forwardMotion_index);
float2 backwardMotion = *(__global const float2 *)(backwardMotionPtr + backwardMotion_index);
__global float2 * forwardMap = (__global float2 *)(forwardMapPtr + forwardMap_index);
__global float2 * backwardMap = (__global float2 *)(backwardMapPtr + backwardMap_index);
forwardMapX[y * forwardMapX_step + x] = x + bx;
forwardMapY[y * forwardMapY_step + x] = y + by;
float2 basePoint = (float2)(x, y);
backwardMapX[y * backwardMapX_step + x] = x + fx;
backwardMapY[y * backwardMapY_step + x] = y + fy;
forwardMap[0] = basePoint + backwardMotion;
backwardMap[0] = basePoint + forwardMotion;
}
}
__kernel void upscaleKernel(__global float* src,
__global float* dst,
int src_step,
int dst_step,
int src_row,
int src_col,
int scale,
int channels
)
__kernel void upscale(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int scale)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src_col && y < src_row)
if (x < src_cols && y < src_rows)
{
if(channels == 1)
{
dst[y * scale * dst_step + x * scale] = src[y * src_step + x];
}
else
{
vstore4(vload4(0, src + y * channels * src_step + 4 * x), 0, dst + y * channels * scale * dst_step + 4 * x * scale);
}
int src_index = mad24(y, src_step, sz * x * cn + src_offset);
int dst_index = mad24(y * scale, dst_step, sz * x * scale * cn + dst_offset);
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
#pragma unroll
for (int c = 0; c < cn; ++c)
dst[c] = src[c];
}
}
float diffSign(float a, float b)
inline float diffSign1(float a, float b)
{
return a > b ? 1.0f : a < b ? -1.0f : 0.0f;
}
float4 diffSign4(float4 a, float4 b)
inline float3 diffSign3(float3 a, float3 b)
{
float4 pos;
float3 pos;
pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f;
pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f;
pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f;
pos.w = 0.0f;
return pos;
}
__kernel void diffSignKernel(__global float* src1,
__global float* src2,
__global float* dst,
int src1_row,
int src1_col,
int dst_step,
int src1_step,
int src2_step)
__kernel void diffSign(__global const uchar * src1, int src1_step, int src1_offset,
__global const uchar * src2, int src2_step, int src2_offset,
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if(x < src1_col && y < src1_row)
{
dst[y * dst_step + x] = diffSign(src1[y * src1_step + x], src2[y * src2_step + x]);
}
if (x < cols && y < rows)
*(__global float *)(dst + mad24(y, dst_step, sz * x + dst_offset)) =
diffSign1(*(__global const float *)(src1 + mad24(y, src1_step, sz * x + src1_offset)),
*(__global const float *)(src2 + mad24(y, src2_step, sz * x + src2_offset)));
}
__kernel void calcBtvRegularizationKernel(__global float* src,
__global float* dst,
int src_step,
int dst_step,
int src_row,
int src_col,
int ksize,
int channels,
__constant float* c_btvRegWeights
)
__kernel void calcBtvRegularization(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
int ksize, __constant float * c_btvRegWeights)
{
int x = get_global_id(0) + ksize;
int y = get_global_id(1) + ksize;
if ((y < src_row - ksize) && (x < src_col - ksize))
if (y < dst_rows - ksize && x < dst_cols - ksize)
{
if(channels == 1)
{
const float srcVal = src[y * src_step + x];
float dstVal = 0.0f;
src += src_offset;
for (int m = 0, count = 0; m <= ksize; ++m)
#if cn == 1
const float srcVal = src_elem_at(src, y, src_step, x);
float dstVal = 0.0f;
for (int m = 0, count = 0; m <= ksize; ++m)
for (int l = ksize; l + m >= 0; --l, ++count)
{
for (int l = ksize; l + m >= 0; --l, ++count)
{
dstVal = dstVal + c_btvRegWeights[count] * (diffSign(srcVal, src[(y + m) * src_step + (x + l)]) - diffSign(src[(y - m) * src_step + (x - l)], srcVal));
}
dstVal += c_btvRegWeights[count] * (diffSign1(srcVal, src_elem_at(src, y + m, src_step, x + l))
- diffSign1(src_elem_at(src, y - m, src_step, x - l), srcVal));
}
dst[y * dst_step + x] = dstVal;
}
else
{
float4 srcVal = vload4(0, src + y * src_step + 4 * x);
float4 dstVal = 0.f;
for (int m = 0, count = 0; m <= ksize; ++m)
dst_elem_at(dst, y, dst_step, x) = dstVal;
#elif cn == 3
__global const float * src0ptr = (__global const float *)(src + mad24(y, src_step, 3 * sz * x + src_offset));
float3 srcVal = (float3)(src0ptr[0], src0ptr[1], src0ptr[2]), dstVal = 0.f;
for (int m = 0, count = 0; m <= ksize; ++m)
{
for (int l = ksize; l + m >= 0; --l, ++count)
{
for (int l = ksize; l + m >= 0; --l, ++count)
{
float4 src1;
src1.x = src[(y + m) * src_step + 4 * (x + l) + 0];
src1.y = src[(y + m) * src_step + 4 * (x + l) + 1];
src1.z = src[(y + m) * src_step + 4 * (x + l) + 2];
src1.w = src[(y + m) * src_step + 4 * (x + l) + 3];
float4 src2;
src2.x = src[(y - m) * src_step + 4 * (x - l) + 0];
src2.y = src[(y - m) * src_step + 4 * (x - l) + 1];
src2.z = src[(y - m) * src_step + 4 * (x - l) + 2];
src2.w = src[(y - m) * src_step + 4 * (x - l) + 3];
dstVal = dstVal + c_btvRegWeights[count] * (diffSign4(srcVal, src1) - diffSign4(src2, srcVal));
}
__global const float * src1ptr = (__global const float *)(src + mad24(y + m, src_step, 3 * sz * (x + l) + src_offset));
__global const float * src2ptr = (__global const float *)(src + mad24(y - m, src_step, 3 * sz * (x - l) + src_offset));
float3 src1 = (float3)(src1ptr[0], src1ptr[1], src1ptr[2]);
float3 src2 = (float3)(src2ptr[0], src2ptr[1], src2ptr[2]);
dstVal += c_btvRegWeights[count] * (diffSign3(srcVal, src1) - diffSign3(src2, srcVal));
}
vstore4(dstVal, 0, dst + y * dst_step + 4 * x);
}
__global float * dstptr = (__global float *)(dst + mad24(y, dst_step, 3 * sz * x + dst_offset + 0));
dstptr[0] = dstVal.x;
dstptr[1] = dstVal.y;
dstptr[2] = dstVal.z;
#else
#error "Number of channels should be either 1 of 3"
#endif
}
}

@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/opencl/ocl_defs.hpp"
using namespace cv;
using namespace cv::cuda;
@ -61,21 +62,66 @@ namespace
void collectGarbage();
protected:
virtual void impl(const Mat& input0, const Mat& input1, OutputArray dst) = 0;
virtual void impl(InputArray input0, InputArray input1, OutputArray dst) = 0;
private:
bool ocl_calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2);
int work_type_;
// Mat
Mat buf_[6];
Mat flow_;
Mat flows_[2];
// UMat
UMat ubuf_[6];
UMat uflow_;
std::vector<UMat> uflows_;
};
CpuOpticalFlow::CpuOpticalFlow(int work_type) : work_type_(work_type)
CpuOpticalFlow::CpuOpticalFlow(int work_type) :
work_type_(work_type)
{
}
bool CpuOpticalFlow::ocl_calc(InputArray _frame0, InputArray _frame1, OutputArray _flow1, OutputArray _flow2)
{
UMat frame0 = arrGetUMat(_frame0, ubuf_[0]);
UMat frame1 = arrGetUMat(_frame1, ubuf_[1]);
CV_Assert( frame1.type() == frame0.type() );
CV_Assert( frame1.size() == frame0.size() );
UMat input0 = convertToType(frame0, work_type_, ubuf_[2], ubuf_[3]);
UMat input1 = convertToType(frame1, work_type_, ubuf_[4], ubuf_[5]);
if (!_flow2.needed())
{
impl(input0, input1, _flow1);
return true;
}
impl(input0, input1, uflow_);
if (!_flow2.needed())
arrCopy(uflow_, _flow1);
else
{
split(uflow_, uflows_);
arrCopy(uflows_[0], _flow1);
arrCopy(uflows_[1], _flow2);
}
return true;
}
void CpuOpticalFlow::calc(InputArray _frame0, InputArray _frame1, OutputArray _flow1, OutputArray _flow2)
{
CV_OCL_RUN(_flow1.isUMat() && (_flow2.isUMat() || !_flow2.needed()),
ocl_calc(_frame0, _frame1, _flow1, _flow2))
Mat frame0 = arrGetMat(_frame0, buf_[0]);
Mat frame1 = arrGetMat(_frame1, buf_[1]);
@ -94,9 +140,7 @@ namespace
impl(input0, input1, flow_);
if (!_flow2.needed())
{
arrCopy(flow_, _flow1);
}
else
{
split(flow_, flows_);
@ -108,11 +152,19 @@ namespace
void CpuOpticalFlow::collectGarbage()
{
// Mat
for (int i = 0; i < 6; ++i)
buf_[i].release();
flow_.release();
flows_[0].release();
flows_[1].release();
// UMat
for (int i = 0; i < 6; ++i)
ubuf_[i].release();
uflow_.release();
uflows_[0].release();
uflows_[1].release();
}
}
@ -129,7 +181,7 @@ namespace
Farneback();
protected:
void impl(const Mat& input0, const Mat& input1, OutputArray dst);
void impl(InputArray input0, InputArray input1, OutputArray dst);
private:
double pyrScale_;
@ -161,7 +213,7 @@ namespace
flags_ = 0;
}
void Farneback::impl(const Mat& input0, const Mat& input1, OutputArray dst)
void Farneback::impl(InputArray input0, InputArray input1, OutputArray dst)
{
calcOpticalFlowFarneback(input0, input1, (InputOutputArray)dst, pyrScale_,
numLevels_, winSize_, numIters_,
@ -187,7 +239,7 @@ namespace
Simple();
protected:
void impl(const Mat& input0, const Mat& input1, OutputArray dst);
void impl(InputArray input0, InputArray input1, OutputArray dst);
private:
int layers_;
@ -237,11 +289,9 @@ namespace
speedUpThr_ = 10;
}
void Simple::impl(const Mat& _input0, const Mat& _input1, OutputArray dst)
void Simple::impl(InputArray _input0, InputArray _input1, OutputArray _dst)
{
Mat input0 = _input0;
Mat input1 = _input1;
calcOpticalFlowSF(input0, input1, dst.getMatRef(),
calcOpticalFlowSF(_input0, _input1, _dst,
layers_,
averagingBlockSize_,
maxFlow_,
@ -278,7 +328,7 @@ namespace
void collectGarbage();
protected:
void impl(const Mat& input0, const Mat& input1, OutputArray dst);
void impl(InputArray input0, InputArray input1, OutputArray dst);
private:
double tau_;
@ -316,7 +366,7 @@ namespace
useInitialFlow_ = alg_->getBool("useInitialFlow");
}
void DualTVL1::impl(const Mat& input0, const Mat& input1, OutputArray dst)
void DualTVL1::impl(InputArray input0, InputArray input1, OutputArray dst)
{
alg_->set("tau", tau_);
alg_->set("lambda", lambda_);
@ -720,269 +770,3 @@ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_CUDA()
}
#endif // HAVE_OPENCV_CUDAOPTFLOW
#ifdef HAVE_OPENCV_OCL
namespace
{
class oclOpticalFlow : public DenseOpticalFlowExt
{
public:
explicit oclOpticalFlow(int work_type);
void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2);
void collectGarbage();
protected:
virtual void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) = 0;
private:
int work_type_;
cv::ocl::oclMat buf_[6];
cv::ocl::oclMat u_, v_, flow_;
};
oclOpticalFlow::oclOpticalFlow(int work_type) : work_type_(work_type)
{
}
void oclOpticalFlow::calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2)
{
ocl::oclMat& _frame0 = ocl::getOclMatRef(frame0);
ocl::oclMat& _frame1 = ocl::getOclMatRef(frame1);
ocl::oclMat& _flow1 = ocl::getOclMatRef(flow1);
ocl::oclMat& _flow2 = ocl::getOclMatRef(flow2);
CV_Assert( _frame1.type() == _frame0.type() );
CV_Assert( _frame1.size() == _frame0.size() );
cv::ocl::oclMat input0_ = convertToType(_frame0, work_type_, buf_[2], buf_[3]);
cv::ocl::oclMat input1_ = convertToType(_frame1, work_type_, buf_[4], buf_[5]);
impl(input0_, input1_, u_, v_);//go to tvl1 algorithm
u_.copyTo(_flow1);
v_.copyTo(_flow2);
}
void oclOpticalFlow::collectGarbage()
{
for (int i = 0; i < 6; ++i)
buf_[i].release();
u_.release();
v_.release();
flow_.release();
}
}
///////////////////////////////////////////////////////////////////
// PyrLK_OCL
namespace
{
class PyrLK_OCL : public oclOpticalFlow
{
public:
AlgorithmInfo* info() const;
PyrLK_OCL();
void collectGarbage();
protected:
void impl(const ocl::oclMat& input0, const ocl::oclMat& input1, ocl::oclMat& dst1, ocl::oclMat& dst2);
private:
int winSize_;
int maxLevel_;
int iterations_;
ocl::PyrLKOpticalFlow alg_;
};
CV_INIT_ALGORITHM(PyrLK_OCL, "DenseOpticalFlowExt.PyrLK_OCL",
obj.info()->addParam(obj, "winSize", obj.winSize_);
obj.info()->addParam(obj, "maxLevel", obj.maxLevel_);
obj.info()->addParam(obj, "iterations", obj.iterations_))
PyrLK_OCL::PyrLK_OCL() : oclOpticalFlow(CV_8UC1)
{
winSize_ = alg_.winSize.width;
maxLevel_ = alg_.maxLevel;
iterations_ = alg_.iters;
}
void PyrLK_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
{
alg_.winSize.width = winSize_;
alg_.winSize.height = winSize_;
alg_.maxLevel = maxLevel_;
alg_.iters = iterations_;
alg_.dense(input0, input1, dst1, dst2);
}
void PyrLK_OCL::collectGarbage()
{
alg_.releaseMemory();
oclOpticalFlow::collectGarbage();
}
}
Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_PyrLK_OCL()
{
return makePtr<PyrLK_OCL>();
}
///////////////////////////////////////////////////////////////////
// DualTVL1_OCL
namespace
{
class DualTVL1_OCL : public oclOpticalFlow
{
public:
AlgorithmInfo* info() const;
DualTVL1_OCL();
void collectGarbage();
protected:
void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2);
private:
double tau_;
double lambda_;
double theta_;
int nscales_;
int warps_;
double epsilon_;
int iterations_;
bool useInitialFlow_;
ocl::OpticalFlowDual_TVL1_OCL alg_;
};
CV_INIT_ALGORITHM(DualTVL1_OCL, "DenseOpticalFlowExt.DualTVL1_OCL",
obj.info()->addParam(obj, "tau", obj.tau_);
obj.info()->addParam(obj, "lambda", obj.lambda_);
obj.info()->addParam(obj, "theta", obj.theta_);
obj.info()->addParam(obj, "nscales", obj.nscales_);
obj.info()->addParam(obj, "warps", obj.warps_);
obj.info()->addParam(obj, "epsilon", obj.epsilon_);
obj.info()->addParam(obj, "iterations", obj.iterations_);
obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_))
DualTVL1_OCL::DualTVL1_OCL() : oclOpticalFlow(CV_8UC1)
{
tau_ = alg_.tau;
lambda_ = alg_.lambda;
theta_ = alg_.theta;
nscales_ = alg_.nscales;
warps_ = alg_.warps;
epsilon_ = alg_.epsilon;
iterations_ = alg_.iterations;
useInitialFlow_ = alg_.useInitialFlow;
}
void DualTVL1_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
{
alg_.tau = tau_;
alg_.lambda = lambda_;
alg_.theta = theta_;
alg_.nscales = nscales_;
alg_.warps = warps_;
alg_.epsilon = epsilon_;
alg_.iterations = iterations_;
alg_.useInitialFlow = useInitialFlow_;
alg_(input0, input1, dst1, dst2);
}
void DualTVL1_OCL::collectGarbage()
{
alg_.collectGarbage();
oclOpticalFlow::collectGarbage();
}
}
Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_OCL()
{
return makePtr<DualTVL1_OCL>();
}
///////////////////////////////////////////////////////////////////
// FarneBack
namespace
{
class FarneBack_OCL : public oclOpticalFlow
{
public:
AlgorithmInfo* info() const;
FarneBack_OCL();
void collectGarbage();
protected:
void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2);
private:
double pyrScale_;
int numLevels_;
int winSize_;
int numIters_;
int polyN_;
double polySigma_;
int flags_;
ocl::FarnebackOpticalFlow alg_;
};
CV_INIT_ALGORITHM(FarneBack_OCL, "DenseOpticalFlowExt.FarneBack_OCL",
obj.info()->addParam(obj, "pyrScale", obj.pyrScale_);
obj.info()->addParam(obj, "numLevels", obj.numLevels_);
obj.info()->addParam(obj, "winSize", obj.winSize_);
obj.info()->addParam(obj, "numIters", obj.numIters_);
obj.info()->addParam(obj, "polyN", obj.polyN_);
obj.info()->addParam(obj, "polySigma", obj.polySigma_);
obj.info()->addParam(obj, "flags", obj.flags_))
FarneBack_OCL::FarneBack_OCL() : oclOpticalFlow(CV_8UC1)
{
pyrScale_ = alg_.pyrScale;
numLevels_ = alg_.numLevels;
winSize_ = alg_.winSize;
numIters_ = alg_.numIters;
polyN_ = alg_.polyN;
polySigma_ = alg_.polySigma;
flags_ = alg_.flags;
}
void FarneBack_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
{
alg_.pyrScale = pyrScale_;
alg_.numLevels = numLevels_;
alg_.winSize = winSize_;
alg_.numIters = numIters_;
alg_.polyN = polyN_;
alg_.polySigma = polySigma_;
alg_.flags = flags_;
alg_(input0, input1, dst1, dst2);
}
void FarneBack_OCL::collectGarbage()
{
alg_.releaseMemory();
oclOpticalFlow::collectGarbage();
}
}
Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_Farneback_OCL()
{
return makePtr<FarneBack_OCL>();
}
#endif

@ -82,10 +82,6 @@
# include "opencv2/cudacodec.hpp"
#endif
#ifdef HAVE_OPENCV_OCL
#include "opencv2/ocl/private/util.hpp"
#endif
#ifdef HAVE_OPENCV_HIGHGUI
#include "opencv2/highgui.hpp"
#endif

@ -54,16 +54,20 @@ cv::superres::SuperResolution::SuperResolution()
{
frameSource_ = createFrameSource_Empty();
firstCall_ = true;
isUmat_ = false;
}
void cv::superres::SuperResolution::setInput(const Ptr<FrameSource>& frameSource)
{
frameSource_ = frameSource;
firstCall_ = true;
isUmat_ = false;
}
void cv::superres::SuperResolution::nextFrame(OutputArray frame)
{
isUmat_ = frame.isUMat();
if (firstCall_)
{
initImpl(frameSource_);
@ -77,6 +81,7 @@ void cv::superres::SuperResolution::reset()
{
frameSource_->reset();
firstCall_ = true;
isUmat_ = false;
}
void cv::superres::SuperResolution::collectGarbage()

@ -41,6 +41,7 @@
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
class AllignedFrameSource : public cv::superres::FrameSource
{
@ -52,6 +53,7 @@ public:
private:
cv::Ptr<cv::superres::FrameSource> base_;
cv::Mat origFrame_;
int scale_;
};
@ -67,9 +69,7 @@ void AllignedFrameSource::nextFrame(cv::OutputArray frame)
base_->nextFrame(origFrame_);
if (origFrame_.rows % scale_ == 0 && origFrame_.cols % scale_ == 0)
{
cv::superres::arrCopy(origFrame_, frame);
}
else
{
cv::Rect ROI(0, 0, (origFrame_.cols / scale_) * scale_, (origFrame_.rows / scale_) * scale_);
@ -92,6 +92,7 @@ public:
private:
cv::Ptr<cv::superres::FrameSource> base_;
cv::Mat origFrame_;
cv::Mat blurred_;
cv::Mat deg_;
@ -104,28 +105,25 @@ DegradeFrameSource::DegradeFrameSource(const cv::Ptr<cv::superres::FrameSource>&
CV_Assert( base_ );
}
void addGaussNoise(cv::Mat& image, double sigma)
static void addGaussNoise(cv::OutputArray _image, double sigma)
{
cv::Mat noise(image.size(), CV_32FC(image.channels()));
int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
cv::Mat noise(_image.size(), CV_32FC(cn));
cvtest::TS::ptr()->get_rng().fill(noise, cv::RNG::NORMAL, 0.0, sigma);
cv::addWeighted(image, 1.0, noise, 1.0, 0.0, image, image.depth());
cv::addWeighted(_image, 1.0, noise, 1.0, 0.0, _image, depth);
}
void addSpikeNoise(cv::Mat& image, int frequency)
static void addSpikeNoise(cv::OutputArray _image, int frequency)
{
cv::Mat_<uchar> mask(image.size(), 0);
cv::Mat_<uchar> mask(_image.size(), 0);
for (int y = 0; y < mask.rows; ++y)
{
for (int x = 0; x < mask.cols; ++x)
{
if (cvtest::TS::ptr()->get_rng().uniform(0, frequency) < 1)
mask(y, x) = 255;
}
}
image.setTo(cv::Scalar::all(255), mask);
_image.setTo(cv::Scalar::all(255), mask);
}
void DegradeFrameSource::nextFrame(cv::OutputArray frame)
@ -146,7 +144,7 @@ void DegradeFrameSource::reset()
base_->reset();
}
double MSSIM(const cv::Mat& i1, const cv::Mat& i2)
double MSSIM(cv::InputArray _i1, cv::InputArray _i2)
{
const double C1 = 6.5025;
const double C2 = 58.5225;
@ -154,8 +152,8 @@ double MSSIM(const cv::Mat& i1, const cv::Mat& i2)
const int depth = CV_32F;
cv::Mat I1, I2;
i1.convertTo(I1, depth);
i2.convertTo(I2, depth);
_i1.getMat().convertTo(I1, depth);
_i2.getMat().convertTo(I2, depth);
cv::Mat I2_2 = I2.mul(I2); // I2^2
cv::Mat I1_2 = I1.mul(I1); // I1^2
@ -201,7 +199,7 @@ double MSSIM(const cv::Mat& i1, const cv::Mat& i2)
// mssim = average of ssim map
cv::Scalar mssim = cv::mean(ssim_map);
if (i1.channels() == 1)
if (_i1.channels() == 1)
return mssim[0];
return (mssim[0] + mssim[1] + mssim[3]) / 3;
@ -210,9 +208,11 @@ double MSSIM(const cv::Mat& i1, const cv::Mat& i2)
class SuperResolution : public testing::Test
{
public:
template <typename T>
void RunTest(cv::Ptr<cv::superres::SuperResolution> superRes);
};
template <typename T>
void SuperResolution::RunTest(cv::Ptr<cv::superres::SuperResolution> superRes)
{
const std::string inputVideoName = cvtest::TS::ptr()->get_data_path() + "car.avi";
@ -245,7 +245,8 @@ void SuperResolution::RunTest(cv::Ptr<cv::superres::SuperResolution> superRes)
double srAvgMSSIM = 0.0;
const int count = 10;
cv::Mat goldFrame, superResFrame;
cv::Mat goldFrame;
T superResFrame;
for (int i = 0; i < count; ++i)
{
goldSource->nextFrame(goldFrame);
@ -266,24 +267,28 @@ void SuperResolution::RunTest(cv::Ptr<cv::superres::SuperResolution> superRes)
TEST_F(SuperResolution, BTVL1)
{
RunTest(cv::superres::createSuperResolution_BTVL1());
RunTest<cv::Mat>(cv::superres::createSuperResolution_BTVL1());
}
#if defined(HAVE_CUDA) && defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) && defined(HAVE_OPENCV_CUDAFILTERS)
TEST_F(SuperResolution, BTVL1_CUDA)
{
RunTest(cv::superres::createSuperResolution_BTVL1_CUDA());
RunTest<cv::Mat>(cv::superres::createSuperResolution_BTVL1_CUDA());
}
#endif
#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL)
#ifdef HAVE_OPENCL
TEST_F(SuperResolution, BTVL1_OCL)
namespace cvtest {
namespace ocl {
OCL_TEST_F(SuperResolution, BTVL1)
{
if (cv::ocl::useOpenCL())
RunTest(cv::superres::createSuperResolution_BTVL1_OCL());
RunTest<cv::UMat>(cv::superres::createSuperResolution_BTVL1());
}
} } // namespace cvtest::ocl
#endif

@ -99,10 +99,14 @@ using std::tr1::tuple;
#define OCL_TEST_CYCLE() \
for (cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
#define OCL_TEST_CYCLE_N(n) \
for(declare.iterations(n), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer())
#define OCL_TEST_CYCLE_MULTIRUN(runsNum) \
for (declare.runs(runsNum), cvtest::ocl::perf::safeFinish(); startTimer(), next(); cvtest::ocl::perf::safeFinish(), stopTimer()) \
for (int r = 0; r < runsNum; cvtest::ocl::perf::safeFinish(), ++r)
namespace perf {
// Check for current device limitation

Loading…
Cancel
Save