Merge remote-tracking branch 'upstream/3.4' into merge-3.4

pull/11421/head
Alexander Alekhin 7 years ago
commit 000a13b6a3
  1. 26
      CMakeLists.txt
  2. 8
      cmake/OpenCVCompilerOptimizations.cmake
  3. 2
      cmake/OpenCVCompilerOptions.cmake
  4. 3
      cmake/OpenCVDetectCUDA.cmake
  5. 7
      cmake/OpenCVFindLibsVideo.cmake
  6. 7
      cmake/OpenCVFindProtobuf.cmake
  7. 37
      cmake/OpenCVUtils.cmake
  8. 2
      cmake/checks/cpu_fp16.cpp
  9. 34
      doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md
  10. BIN
      doc/tutorials/dnn/images/lena_hed.jpg
  11. 4
      modules/core/include/opencv2/core/cvstd.inl.hpp
  12. 4
      modules/core/include/opencv2/core/mat.hpp
  13. 21
      modules/core/include/opencv2/core/mat.inl.hpp
  14. 17
      modules/core/src/convert_scale.cpp
  15. 12
      modules/core/src/cuda_gpu_mat.cpp
  16. 10
      modules/core/src/norm.cpp
  17. 2
      modules/core/src/parallel.cpp
  18. 37
      modules/core/test/test_arithm.cpp
  19. 2
      modules/dnn/include/opencv2/dnn/shape_utils.hpp
  20. 180
      modules/dnn/misc/python/pyopencv_dnn.hpp
  21. 1
      modules/dnn/src/layers/elementwise_layers.cpp
  22. 4
      modules/dnn/src/layers/prior_box_layer.cpp
  23. 4
      modules/dnn/src/layers/slice_layer.cpp
  24. 5
      modules/dnn/src/op_halide.cpp
  25. 11
      modules/imgproc/src/clahe.cpp
  26. 75
      modules/imgproc/src/fixedpoint.inl.hpp
  27. 72
      modules/imgproc/src/opencl/clahe.cl
  28. 4
      modules/python/src2/cv2.cpp
  29. 12
      modules/stitching/include/opencv2/stitching/detail/blenders.hpp
  30. 184
      modules/stitching/src/blenders.cpp
  31. 14
      modules/stitching/test/test_blenders.cuda.cpp
  32. 3
      modules/videoio/CMakeLists.txt
  33. 5150
      modules/videoio/src/cap_msmf.cpp
  34. 3113
      modules/videoio/src/cap_msmf.hpp
  35. 1
      modules/videoio/src/cap_openni2.cpp
  36. 6
      platforms/android/build_sdk.py
  37. 69
      samples/dnn/edge_detection.py

@ -277,6 +277,7 @@ OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF
OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" ON IF (UNIX AND NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_LAPACK "Include Lapack library support" (NOT CV_DISABLE_OPTIMIZATION) IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_ITT "Include Intel ITT support" ON IF (NOT APPLE_FRAMEWORK) )
OCV_OPTION(WITH_PROTOBUF "Enable libprotobuf" ON )
# OpenCV build components
# ===================================================
@ -523,16 +524,7 @@ set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV
find_host_package(Git QUIET)
if(NOT DEFINED OPENCV_VCSVERSION AND GIT_FOUND)
execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --always --dirty --match "[0-9].[0-9].[0-9]*"
WORKING_DIRECTORY "${OpenCV_SOURCE_DIR}"
OUTPUT_VARIABLE OPENCV_VCSVERSION
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT GIT_RESULT EQUAL 0)
set(OPENCV_VCSVERSION "unknown")
endif()
ocv_git_describe(OPENCV_VCSVERSION "${OpenCV_SOURCE_DIR}")
elseif(NOT DEFINED OPENCV_VCSVERSION)
# We don't have git:
set(OPENCV_VCSVERSION "unknown")
@ -928,19 +920,7 @@ if(OPENCV_EXTRA_MODULES_PATH AND NOT BUILD_INFO_SKIP_EXTRA_MODULES)
else()
status("")
endif()
set(EXTRA_MODULES_VCSVERSION "unknown")
if(GIT_FOUND)
execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --always --dirty --match "[0-9].[0-9].[0-9]*"
WORKING_DIRECTORY "${p}"
OUTPUT_VARIABLE EXTRA_MODULES_VCSVERSION
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT GIT_RESULT EQUAL 0)
set(EXTRA_MODULES_VCSVERSION "unknown")
endif()
endif()
ocv_git_describe(EXTRA_MODULES_VCSVERSION "${p}")
status(" Location (extra):" ${p})
status(" Version control (extra):" ${EXTRA_MODULES_VCSVERSION})
endif()

@ -234,6 +234,7 @@ if(X86 OR X86_64)
elseif(MSVC)
ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2")
ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX")
ocv_update(CPU_FP16_FLAGS_ON "/arch:AVX")
if(NOT MSVC64)
# 64-bit MSVC compiler uses SSE/SSE2 by default
ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE")
@ -422,6 +423,9 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
if(__is_disabled)
set(__is_from_baseline 0)
else()
if(CPU_${OPT}_SUPPORTED AND CPU_BASELINE_DETECT)
list(APPEND CPU_BASELINE_FINAL ${OPT})
endif()
ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE})
if(NOT __is_from_baseline)
ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE})
@ -441,7 +445,9 @@ foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline)
list(APPEND CPU_DISPATCH_FINAL ${OPT})
elseif(__is_from_baseline)
list(APPEND CPU_BASELINE_FINAL ${OPT})
if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};")
list(APPEND CPU_BASELINE_FINAL ${OPT})
endif()
ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT})
endif()
endif()

@ -104,7 +104,7 @@ if(CV_GCC OR CV_CLANG)
add_extra_compiler_option(-Wuninitialized)
add_extra_compiler_option(-Winit-self)
if(HAVE_CXX11)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT ENABLE_PRECOMPILED_HEADERS)
add_extra_compiler_option(-Wsuggest-override)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
add_extra_compiler_option(-Winconsistent-missing-override)

@ -200,6 +200,9 @@ if(CUDA_FOUND)
string(REPLACE "-frtti" "" ${var} "${${var}}")
string(REPLACE "-fvisibility-inlines-hidden" "" ${var} "${${var}}")
# cc1: warning: command line option '-Wsuggest-override' is valid for C++/ObjC++ but not for C
string(REPLACE "-Wsuggest-override" "" ${var} "${${var}}")
endforeach()
endmacro()

@ -261,6 +261,13 @@ endif(WITH_DSHOW)
ocv_clear_vars(HAVE_MSMF)
if(WITH_MSMF)
check_include_file(Mfapi.h HAVE_MSMF)
check_include_file(D3D11.h D3D11_found)
check_include_file(D3d11_4.h D3D11_4_found)
if(D3D11_found AND D3D11_4_found)
set(HAVE_DXVA YES)
else()
set(HAVE_DXVA NO)
endif()
endif(WITH_MSMF)
# --- Extra HighGUI and VideoIO libs on Windows ---

@ -1,15 +1,14 @@
# If protobuf is found - libprotobuf target is available
ocv_option(WITH_PROTOBUF "Enable libprotobuf" ON)
ocv_option(BUILD_PROTOBUF "Force to build libprotobuf from sources" ON)
ocv_option(PROTOBUF_UPDATE_FILES "Force rebuilding .proto files (protoc should be available)" OFF)
set(HAVE_PROTOBUF FALSE)
if(NOT WITH_PROTOBUF)
return()
endif()
ocv_option(BUILD_PROTOBUF "Force to build libprotobuf from sources" ON)
ocv_option(PROTOBUF_UPDATE_FILES "Force rebuilding .proto files (protoc should be available)" OFF)
function(get_protobuf_version version include)
file(STRINGS "${include}/google/protobuf/stubs/common.h" ver REGEX "#define GOOGLE_PROTOBUF_VERSION [0-9]+")
string(REGEX MATCHALL "[0-9]+" ver ${ver})

@ -1620,3 +1620,40 @@ if(NOT CMAKE_VERSION VERSION_LESS 3.1)
else()
set(compatible_MESSAGE_NEVER "")
endif()
macro(ocv_git_describe var_name path)
if(GIT_FOUND)
execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --tags --exact-match --dirty
WORKING_DIRECTORY "${path}"
OUTPUT_VARIABLE ${var_name}
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT GIT_RESULT EQUAL 0)
execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --always --dirty --match "[0-9].[0-9].[0-9]*" --exclude "[^-]*-cvsdk"
WORKING_DIRECTORY "${path}"
OUTPUT_VARIABLE ${var_name}
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT GIT_RESULT EQUAL 0) # --exclude is not supported by 'git'
# match only tags with complete OpenCV versions (ignores -alpha/-beta/-rc suffixes)
execute_process(COMMAND "${GIT_EXECUTABLE}" describe --tags --always --dirty --match "[0-9].[0-9]*[0-9]"
WORKING_DIRECTORY "${path}"
OUTPUT_VARIABLE ${var_name}
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT GIT_RESULT EQUAL 0)
set(${var_name} "unknown")
endif()
endif()
endif()
else()
set(${var_name} "unknown")
endif()
endmacro()

@ -1,6 +1,6 @@
#include <stdio.h>
#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__)
#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700 && defined __AVX__) || (defined __INTEL_COMPILER && defined __AVX__)
#include <immintrin.h>
int test()
{

@ -190,3 +190,37 @@ In our case resize's output shape will be stored in layer's `blobs[0]`.
Next we register a layer and try to import the model.
@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer
## Define a custom layer in Python
The following example shows how to customize OpenCV's layers in Python.
Let's consider [Holistically-Nested Edge Detection](https://arxiv.org/abs/1504.06375)
deep learning model. That was trained with one and only difference comparing to
a current version of [Caffe framework](http://caffe.berkeleyvision.org/). `Crop`
layers that receive two input blobs and crop the first one to match spatial dimensions
of the second one used to crop from the center. Nowadays Caffe's layer does it
from the top-left corner. So using the latest version of Caffe or OpenCV you'll
get shifted results with filled borders.
Next we're going to replace OpenCV's `Crop` layer that makes top-left cropping by
a centric one.
- Create a class with `getMemoryShapes` and `forward` methods
@snippet dnn/edge_detection.py CropLayer
@note Both methods should return lists.
- Register a new layer.
@snippet dnn/edge_detection.py Register
That's it! We've replaced an implemented OpenCV's layer to a custom one.
You may find a full script in the [source code](https://github.com/opencv/opencv/tree/master/samples/dnn/edge_detection.py).
<table border="0">
<tr>
<td>![](js_tutorials/js_assets/lena.jpg)</td>
<td>![](images/lena_hed.jpg)</td>
</tr>
</table>

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

@ -265,10 +265,10 @@ std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
{
int i, dims = msize.p[-1];
int i, dims = msize.dims();
for( i = 0; i < dims; i++ )
{
out << msize.p[i];
out << msize[i];
if( i < dims-1 )
out << " x ";
}

@ -236,6 +236,7 @@ public:
bool isUMatVector() const;
bool isMatx() const;
bool isVector() const;
bool isGpuMat() const;
bool isGpuMatVector() const;
~_InputArray();
@ -544,10 +545,11 @@ struct CV_EXPORTS UMatData
struct CV_EXPORTS MatSize
{
explicit MatSize(int* _p);
int dims() const;
Size operator()() const;
const int& operator[](int i) const;
int& operator[](int i);
operator const int*() const;
operator const int*() const; // TODO OpenCV 4.0: drop this
bool operator == (const MatSize& sz) const;
bool operator != (const MatSize& sz) const;

@ -155,6 +155,7 @@ inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; }
inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR ||
kind() == _InputArray::STD_BOOL_VECTOR ||
kind() == _InputArray::STD_ARRAY; }
inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; }
inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; }
////////////////////////////////////////////////////////////////////////////////////////
@ -1391,22 +1392,36 @@ inline
MatSize::MatSize(int* _p)
: p(_p) {}
inline
int MatSize::dims() const
{
return (p - 1)[0];
}
inline
Size MatSize::operator()() const
{
CV_DbgAssert(p[-1] <= 2);
CV_DbgAssert(dims() <= 2);
return Size(p[1], p[0]);
}
inline
const int& MatSize::operator[](int i) const
{
CV_DbgAssert(i < dims());
#ifdef __OPENCV_BUILD
CV_DbgAssert(i >= 0);
#endif
return p[i];
}
inline
int& MatSize::operator[](int i)
{
CV_DbgAssert(i < dims());
#ifdef __OPENCV_BUILD
CV_DbgAssert(i >= 0);
#endif
return p[i];
}
@ -1419,8 +1434,8 @@ MatSize::operator const int*() const
inline
bool MatSize::operator == (const MatSize& sz) const
{
int d = p[-1];
int dsz = sz.p[-1];
int d = dims();
int dsz = sz.dims();
if( d != dsz )
return false;
if( d == 2 )

@ -1885,13 +1885,24 @@ void cv::normalize( InputArray _src, InputOutputArray _dst, double a, double b,
CV_INSTRUMENT_REGION()
double scale = 1, shift = 0;
int type = _src.type(), depth = CV_MAT_DEPTH(type);
if( rtype < 0 )
rtype = _dst.fixedType() ? _dst.depth() : depth;
if( norm_type == CV_MINMAX )
{
double smin = 0, smax = 0;
double dmin = MIN( a, b ), dmax = MAX( a, b );
minMaxIdx( _src, &smin, &smax, 0, 0, _mask );
scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
shift = dmin - smin*scale;
if( rtype == CV_32F )
{
scale = (float)scale;
shift = (float)dmin - (float)(smin*scale);
}
else
shift = dmin - smin*scale;
}
else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
{
@ -1902,10 +1913,6 @@ void cv::normalize( InputArray _src, InputOutputArray _dst, double a, double b,
else
CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
int type = _src.type(), depth = CV_MAT_DEPTH(type);
if( rtype < 0 )
rtype = _dst.fixedType() ? _dst.depth() : depth;
CV_OCL_RUN(_dst.isUMat(),
ocl_normalize(_src, _dst, _mask, rtype, scale, shift))

@ -344,13 +344,12 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
GpuMat cv::cuda::getInputMat(InputArray _src, Stream& stream)
{
GpuMat src;
#ifndef HAVE_CUDA
(void) _src;
(void) stream;
throw_no_cuda();
#else
GpuMat src;
if (_src.kind() == _InputArray::CUDA_GPU_MAT)
{
src = _src.getGpuMat();
@ -361,15 +360,12 @@ GpuMat cv::cuda::getInputMat(InputArray _src, Stream& stream)
src = pool.getBuffer(_src.size(), _src.type());
src.upload(_src, stream);
}
#endif
return src;
#endif
}
GpuMat cv::cuda::getOutputMat(OutputArray _dst, int rows, int cols, int type, Stream& stream)
{
GpuMat dst;
#ifndef HAVE_CUDA
(void) _dst;
(void) rows;
@ -378,6 +374,7 @@ GpuMat cv::cuda::getOutputMat(OutputArray _dst, int rows, int cols, int type, St
(void) stream;
throw_no_cuda();
#else
GpuMat dst;
if (_dst.kind() == _InputArray::CUDA_GPU_MAT)
{
_dst.create(rows, cols, type);
@ -388,9 +385,8 @@ GpuMat cv::cuda::getOutputMat(OutputArray _dst, int rows, int cols, int type, St
BufferPool pool(stream);
dst = pool.getBuffer(rows, cols, type);
}
#endif
return dst;
#endif
}
void cv::cuda::syncOutput(const GpuMat& dst, OutputArray _dst, Stream& stream)

@ -1005,6 +1005,16 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
0) : 0;
if (cv::ipp::getIppTopFeatures() & (
#if IPP_VERSION_X100 >= 201700
ippCPUID_AVX512F |
#endif
ippCPUID_AVX2)
) // IPP_DISABLE_NORM_16UC3_mask_small (#11399)
{
if (normType == NORM_L1 && type == CV_16UC3 && sz.width < 16)
return false;
}
if( ippiNormDiff_C3CMR )
{
Ipp64f norm1, norm2, norm3;

@ -129,8 +129,6 @@
# define CV__EXCEPTION_PTR 0 // Not supported, details: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58938
# elif defined(CV_CXX11)
# define CV__EXCEPTION_PTR 1
# elif defined(CV_ICC)
# define CV__EXCEPTION_PTR 1
# elif defined(_MSC_VER)
# define CV__EXCEPTION_PTR (_MSC_VER >= 1600)
# elif defined(__clang__)

@ -1918,6 +1918,25 @@ TEST(Normalize, regression_5876_inplace_change_type)
EXPECT_EQ(0, cvtest::norm(m, result, NORM_INF));
}
TEST(Normalize, regression_6125)
{
float initial_values[] = {
1888, 1692, 369, 263, 199,
280, 326, 129, 143, 126,
233, 221, 130, 126, 150,
249, 575, 574, 63, 12
};
Mat src(Size(20, 1), CV_32F, initial_values);
float min = 0., max = 400.;
normalize(src, src, 0, 400, NORM_MINMAX, CV_32F);
for(int i = 0; i < 20; i++)
{
EXPECT_GE(src.at<float>(i), min) << "Value should be >= 0";
EXPECT_LE(src.at<float>(i), max) << "Value should be <= 400";
}
}
TEST(MinMaxLoc, regression_4955_nans)
{
cv::Mat one_mat(2, 2, CV_32F, cv::Scalar(1));
@ -2084,4 +2103,22 @@ TEST(Core_Set, regression_11044)
EXPECT_EQ(std::numeric_limits<double>::infinity(), testDouble.at<double>(0, 0));
}
TEST(Core_Norm, IPP_regression_NORM_L1_16UC3_small)
{
int cn = 3;
Size sz(9, 4); // width < 16
Mat a(sz, CV_MAKE_TYPE(CV_16U, cn), Scalar::all(1));
Mat b(sz, CV_MAKE_TYPE(CV_16U, cn), Scalar::all(2));
uchar mask_[9*4] = {
255, 255, 255, 0, 255, 255, 0, 255, 0,
0, 255, 0, 0, 255, 255, 255, 255, 0,
0, 0, 0, 255, 0, 255, 0, 255, 255,
0, 0, 255, 0, 255, 255, 255, 0, 255
};
Mat mask(sz, CV_8UC1, mask_);
EXPECT_EQ((double)9*4*cn, cv::norm(a, b, NORM_L1)); // without mask, IPP works well
EXPECT_EQ((double)20*cn, cv::norm(a, b, NORM_L1, mask));
}
}} // namespace

@ -134,7 +134,7 @@ static inline MatShape shape(const Mat& mat)
static inline MatShape shape(const MatSize& sz)
{
return shape(sz.p, sz[-1]);
return shape(sz.p, sz.dims());
}
static inline MatShape shape(const UMat& mat)

@ -40,4 +40,182 @@ bool pyopencv_to(PyObject *o, std::vector<Mat> &blobs, const char *name) //requi
return pyopencvVecConverter<Mat>::to(o, blobs, ArgInfo(name, false));
}
#endif
template<typename T>
PyObject* pyopencv_from(const dnn::DictValue &dv)
{
if (dv.size() > 1)
{
std::vector<T> vec(dv.size());
for (int i = 0; i < dv.size(); ++i)
vec[i] = dv.get<T>(i);
return pyopencv_from_generic_vec(vec);
}
else
return pyopencv_from(dv.get<T>());
}
template<>
PyObject* pyopencv_from(const dnn::DictValue &dv)
{
if (dv.isInt()) return pyopencv_from<int>(dv);
if (dv.isReal()) return pyopencv_from<float>(dv);
if (dv.isString()) return pyopencv_from<String>(dv);
CV_Error(Error::StsNotImplemented, "Unknown value type");
return NULL;
}
template<>
PyObject* pyopencv_from(const dnn::LayerParams& lp)
{
PyObject* dict = PyDict_New();
for (std::map<String, dnn::DictValue>::const_iterator it = lp.begin(); it != lp.end(); ++it)
{
CV_Assert(!PyDict_SetItemString(dict, it->first.c_str(), pyopencv_from(it->second)));
}
return dict;
}
class pycvLayer CV_FINAL : public dnn::Layer
{
public:
pycvLayer(const dnn::LayerParams &params, PyObject* pyLayer) : Layer(params)
{
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
PyObject* args = PyTuple_New(2);
CV_Assert(!PyTuple_SetItem(args, 0, pyopencv_from(params)));
CV_Assert(!PyTuple_SetItem(args, 1, pyopencv_from(params.blobs)));
o = PyObject_CallObject(pyLayer, args);
Py_DECREF(args);
PyGILState_Release(gstate);
if (!o)
CV_Error(Error::StsError, "Failed to create an instance of custom layer");
}
static void registerLayer(const std::string& type, PyObject* o)
{
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
if (it != pyLayers.end())
it->second.push_back(o);
else
pyLayers[type] = std::vector<PyObject*>(1, o);
}
static void unregisterLayer(const std::string& type)
{
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
if (it != pyLayers.end())
{
if (it->second.size() > 1)
it->second.pop_back();
else
pyLayers.erase(it);
}
}
static Ptr<dnn::Layer> create(dnn::LayerParams &params)
{
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(params.type);
if (it == pyLayers.end())
CV_Error(Error::StsNotImplemented, "Layer with a type \"" + params.type +
"\" is not implemented");
CV_Assert(!it->second.empty());
return Ptr<dnn::Layer>(new pycvLayer(params, it->second.back()));
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &) const CV_OVERRIDE
{
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
PyObject* args = PyList_New(inputs.size());
for(size_t i = 0; i < inputs.size(); ++i)
PyList_SET_ITEM(args, i, pyopencv_from_generic_vec(inputs[i]));
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("getMemoryShapes"), args, NULL);
Py_DECREF(args);
PyGILState_Release(gstate);
if (!res)
CV_Error(Error::StsNotImplemented, "Failed to call \"getMemoryShapes\" method");
pyopencv_to_generic_vec(res, outputs, ArgInfo("", 0));
return false;
}
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &) CV_OVERRIDE
{
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
std::vector<Mat> inps(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i)
inps[i] = *inputs[i];
PyObject* args = pyopencv_from(inps);
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("forward"), args, NULL);
Py_DECREF(args);
PyGILState_Release(gstate);
if (!res)
CV_Error(Error::StsNotImplemented, "Failed to call \"forward\" method");
std::vector<Mat> pyOutputs;
pyopencv_to(res, pyOutputs, ArgInfo("", 0));
CV_Assert(pyOutputs.size() == outputs.size());
for (size_t i = 0; i < outputs.size(); ++i)
{
CV_Assert(pyOutputs[i].size == outputs[i].size);
CV_Assert(pyOutputs[i].type() == outputs[i].type());
pyOutputs[i].copyTo(outputs[i]);
}
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, "");
}
private:
// Map layers types to python classes.
static std::map<std::string, std::vector<PyObject*> > pyLayers;
PyObject* o; // Instance of implemented python layer.
};
std::map<std::string, std::vector<PyObject*> > pycvLayer::pyLayers;
static PyObject *pyopencv_cv_dnn_registerLayer(PyObject*, PyObject *args, PyObject *kw)
{
const char *keywords[] = { "type", "class", NULL };
char* layerType;
PyObject *classInstance;
if (!PyArg_ParseTupleAndKeywords(args, kw, "sO", (char**)keywords, &layerType, &classInstance))
return NULL;
if (!PyCallable_Check(classInstance)) {
PyErr_SetString(PyExc_TypeError, "class must be callable");
return NULL;
}
pycvLayer::registerLayer(layerType, classInstance);
dnn::LayerFactory::registerLayer(layerType, pycvLayer::create);
Py_RETURN_NONE;
}
static PyObject *pyopencv_cv_dnn_unregisterLayer(PyObject*, PyObject *args, PyObject *kw)
{
const char *keywords[] = { "type", NULL };
char* layerType;
if (!PyArg_ParseTupleAndKeywords(args, kw, "s", (char**)keywords, &layerType))
return NULL;
pycvLayer::unregisterLayer(layerType);
dnn::LayerFactory::unregisterLayer(layerType);
Py_RETURN_NONE;
}
#endif // HAVE_OPENCV_DNN

@ -329,6 +329,7 @@ struct ReLUFunctor
{
lp.type = "ReLU";
std::shared_ptr<InferenceEngine::ReLULayer> ieLayer(new InferenceEngine::ReLULayer(lp));
ieLayer->negative_slope = slope;
return ieLayer;
}
#endif // HAVE_INF_ENGINE

@ -205,7 +205,9 @@ public:
if (_explicitSizes)
{
CV_Assert(_aspectRatios.empty(), !params.has("min_size"), !params.has("max_size"));
CV_Assert(_aspectRatios.empty());
CV_Assert(!params.has("min_size"));
CV_Assert(!params.has("max_size"));
_boxWidths = widths;
_boxHeights = heights;
}

@ -161,14 +161,14 @@ public:
for (int i = 0; i < outputs.size(); ++i)
{
CV_Assert(sliceRanges[i].size() <= inpShape[-1]);
CV_Assert(sliceRanges[i].size() <= inpShape.dims());
// Clamp.
for (int j = 0; j < sliceRanges[i].size(); ++j)
{
sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
}
// Fill the rest of ranges.
for (int j = sliceRanges[i].size(); j < inpShape[-1]; ++j)
for (int j = sliceRanges[i].size(); j < inpShape.dims(); ++j)
{
sliceRanges[i].push_back(Range::all());
}

@ -6,6 +6,7 @@
// Third party copyrights are property of their respective owners.
#include "precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include "op_halide.hpp"
#ifdef HAVE_HALIDE
@ -36,7 +37,7 @@ static MatShape getBufferShape(const MatShape& shape)
static MatShape getBufferShape(const MatSize& size)
{
return getBufferShape(MatShape(size.p, size.p + size[-1]));
return getBufferShape(shape(size));
}
Halide::Buffer<float> wrapToHalideBuffer(const Mat& mat)
@ -160,7 +161,7 @@ void HalideBackendWrapper::setHostDirty()
void getCanonicalSize(const MatSize& size, int* w, int* h, int* c, int* n)
{
getCanonicalSize(MatShape(size.p, size.p + size[-1]), w, h, c, n);
getCanonicalSize(shape(size), w, h, c, n);
}
void getCanonicalSize(const MatShape& shape, int* width, int* height,

@ -54,16 +54,7 @@ namespace clahe
const int tilesX, const int tilesY, const cv::Size tileSize,
const int clipLimit, const float lutScale)
{
cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
if(k.empty())
return false;

@ -55,9 +55,11 @@ public:
CV_ALWAYS_INLINE fixedpoint64 operator * (const int32_t& val2) const { return operator *(fixedpoint64(val2)); }
CV_ALWAYS_INLINE fixedpoint64 operator * (const fixedpoint64& val2) const
{
uint64_t uval = (uint64_t)((val ^ (val >> 63)) - (val >> 63));
uint64_t umul = (uint64_t)((val2.val ^ (val2.val >> 63)) - (val2.val >> 63));
int64_t ressign = (val >> 63) ^ (val2.val >> 63);
bool sign_val = val < 0;
bool sign_mul = val2.val < 0;
uint64_t uval = sign_val ? (uint64_t)(-val) : (uint64_t)val;
uint64_t umul = sign_mul ? (uint64_t)(-val2.val) : (uint64_t)val2.val;
bool ressign = sign_val ^ sign_mul;
uint64_t sh0 = fixedround((uval & 0xFFFFFFFF) * (umul & 0xFFFFFFFF));
uint64_t sh1_0 = (uval >> 32) * (umul & 0xFFFFFFFF);
@ -67,33 +69,37 @@ public:
uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32);
val0_l &= 0xFFFFFFFF;
if ( (sh2 >> 32) || (val0_h >> ressign ? 32 : 31) )
return (ressign ? ~(int64_t)0x7FFFFFFFFFFFFFFF : (int64_t)0x7FFFFFFFFFFFFFFF);
if (sh2 > CV_BIG_INT(0x7FFFFFFF) || val0_h > CV_BIG_INT(0x7FFFFFFF))
return (int64_t)(ressign ? CV_BIG_UINT(0x8000000000000000) : CV_BIG_INT(0x7FFFFFFFFFFFFFFF));
if (ressign)
{
val0_l = (~val0_l + 1) & 0xFFFFFFFF;
val0_h = val0_l ? ~val0_h : (~val0_h + 1);
return -(int64_t)(val0_h << 32 | val0_l);
}
return (int64_t)(val0_h << 32 | val0_l);
}
CV_ALWAYS_INLINE fixedpoint64 operator + (const fixedpoint64& val2) const
{
int64_t res = val + val2.val;
return ((val ^ res) & (val2.val ^ res)) >> 63 ? ~(res & ~0x7FFFFFFFFFFFFFFF) : res;
return (int64_t)(((val ^ res) & (val2.val ^ res)) < 0 ? ~(res & CV_BIG_UINT(0x8000000000000000)) : res);
}
CV_ALWAYS_INLINE fixedpoint64 operator - (const fixedpoint64& val2) const
{
int64_t res = val - val2.val;
return ((val ^ val2.val) & (val ^ res)) >> 63 ? ~(res & ~0x7FFFFFFFFFFFFFFF) : res;
return (int64_t)(((val ^ val2.val) & (val ^ res)) < 0 ? ~(res & CV_BIG_UINT(0x8000000000000000)) : res);
}
CV_ALWAYS_INLINE fixedpoint64 operator >> (int n) const { return fixedpoint64(val >> n); }
CV_ALWAYS_INLINE fixedpoint64 operator << (int n) const { return fixedpoint64(val << n); }
CV_ALWAYS_INLINE bool operator == (const fixedpoint64& val2) const { return val == val2.val; }
template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int64_t)fixedround((uint64_t)val) >> fixedShift); }
CV_ALWAYS_INLINE ET saturate_cast() const { return cv::saturate_cast<ET>((int64_t)fixedround((uint64_t)val) >> fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1LL << fixedShift); }
CV_ALWAYS_INLINE operator uint8_t() const { return saturate_cast<uint8_t>(); }
CV_ALWAYS_INLINE operator int8_t() const { return saturate_cast<int8_t>(); }
CV_ALWAYS_INLINE operator uint16_t() const { return saturate_cast<uint16_t>(); }
CV_ALWAYS_INLINE operator int16_t() const { return saturate_cast<int16_t>(); }
CV_ALWAYS_INLINE operator int32_t() const { return saturate_cast<int32_t>(); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE fixedpoint64 zero() { return fixedpoint64(); }
static CV_ALWAYS_INLINE fixedpoint64 one() { return fixedpoint64((int64_t)(1LL << fixedShift)); }
@ -133,15 +139,15 @@ public:
uint64_t val0_h = (sh2 & 0xFFFFFFFF) + (sh1_0 >> 32) + (sh1_1 >> 32) + (val0_l >> 32);
val0_l &= 0xFFFFFFFF;
if ((sh2 >> 32) || (val0_h >> 32))
return ((uint64_t)0xFFFFFFFFFFFFFFFF);
if (sh2 > CV_BIG_INT(0xFFFFFFFF) || val0_h > CV_BIG_INT(0xFFFFFFFF))
return (uint64_t)CV_BIG_UINT(0xFFFFFFFFFFFFFFFF);
return val0_h << 32 | val0_l;
return (val0_h << 32 | val0_l);
}
CV_ALWAYS_INLINE ufixedpoint64 operator + (const ufixedpoint64& val2) const
{
uint64_t res = val + val2.val;
return (val > res) ? (uint64_t)0xFFFFFFFFFFFFFFFF : res;
return (uint64_t)((val > res) ? CV_BIG_UINT(0xFFFFFFFFFFFFFFFF) : res);
}
CV_ALWAYS_INLINE ufixedpoint64 operator - (const ufixedpoint64& val2) const
{
@ -151,9 +157,14 @@ public:
CV_ALWAYS_INLINE ufixedpoint64 operator << (int n) const { return ufixedpoint64(val << n); }
CV_ALWAYS_INLINE bool operator == (const ufixedpoint64& val2) const { return val == val2.val; }
template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE ET saturate_cast() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1LL << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1LL << fixedShift); }
CV_ALWAYS_INLINE operator uint8_t() const { return saturate_cast<uint8_t>(); }
CV_ALWAYS_INLINE operator int8_t() const { return saturate_cast<int8_t>(); }
CV_ALWAYS_INLINE operator uint16_t() const { return saturate_cast<uint16_t>(); }
CV_ALWAYS_INLINE operator int16_t() const { return saturate_cast<int16_t>(); }
CV_ALWAYS_INLINE operator int32_t() const { return saturate_cast<int32_t>(); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE ufixedpoint64 zero() { return ufixedpoint64(); }
static CV_ALWAYS_INLINE ufixedpoint64 one() { return ufixedpoint64((uint64_t)(1ULL << fixedShift)); }
@ -187,21 +198,26 @@ public:
CV_ALWAYS_INLINE fixedpoint32 operator + (const fixedpoint32& val2) const
{
int32_t res = val + val2.val;
return ((val ^ res) & (val2.val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
return (int64_t)((val ^ res) & (val2.val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
}
CV_ALWAYS_INLINE fixedpoint32 operator - (const fixedpoint32& val2) const
{
int32_t res = val - val2.val;
return ((val ^ val2.val) & (val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
return (int64_t)((val ^ val2.val) & (val ^ res)) >> 31 ? ~(res & ~0x7FFFFFFF) : res;
}
CV_ALWAYS_INLINE fixedpoint32 operator >> (int n) const { return fixedpoint32(val >> n); }
CV_ALWAYS_INLINE fixedpoint32 operator << (int n) const { return fixedpoint32(val << n); }
CV_ALWAYS_INLINE bool operator == (const fixedpoint32& val2) const { return val == val2.val; }
template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int32_t)fixedround((uint32_t)val) >> fixedShift); }
CV_ALWAYS_INLINE ET saturate_cast() const { return cv::saturate_cast<ET>((int32_t)fixedround((uint32_t)val) >> fixedShift); }
CV_ALWAYS_INLINE operator fixedpoint64() const { return (int64_t)val << (fixedpoint64::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator uint8_t() const { return saturate_cast<uint8_t>(); }
CV_ALWAYS_INLINE operator int8_t() const { return saturate_cast<int8_t>(); }
CV_ALWAYS_INLINE operator uint16_t() const { return saturate_cast<uint16_t>(); }
CV_ALWAYS_INLINE operator int16_t() const { return saturate_cast<int16_t>(); }
CV_ALWAYS_INLINE operator int32_t() const { return saturate_cast<int32_t>(); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE fixedpoint32 zero() { return fixedpoint32(); }
static CV_ALWAYS_INLINE fixedpoint32 one() { return fixedpoint32((1 << fixedShift)); }
@ -242,10 +258,15 @@ public:
CV_ALWAYS_INLINE ufixedpoint32 operator << (int n) const { return ufixedpoint32(val << n); }
CV_ALWAYS_INLINE bool operator == (const ufixedpoint32& val2) const { return val == val2.val; }
template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE ET saturate_cast() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE operator ufixedpoint64() const { return (uint64_t)val << (ufixedpoint64::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator uint8_t() const { return saturate_cast<uint8_t>(); }
CV_ALWAYS_INLINE operator int8_t() const { return saturate_cast<int8_t>(); }
CV_ALWAYS_INLINE operator uint16_t() const { return saturate_cast<uint16_t>(); }
CV_ALWAYS_INLINE operator int16_t() const { return saturate_cast<int16_t>(); }
CV_ALWAYS_INLINE operator int32_t() const { return saturate_cast<int32_t>(); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE ufixedpoint32 zero() { return ufixedpoint32(); }
static CV_ALWAYS_INLINE ufixedpoint32 one() { return ufixedpoint32((1U << fixedShift)); }
@ -284,10 +305,15 @@ public:
CV_ALWAYS_INLINE fixedpoint16 operator << (int n) const { return fixedpoint16((int16_t)(val << n)); }
CV_ALWAYS_INLINE bool operator == (const fixedpoint16& val2) const { return val == val2.val; }
template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>((int16_t)fixedround((uint16_t)val) >> fixedShift); }
CV_ALWAYS_INLINE ET saturate_cast() const { return cv::saturate_cast<ET>((int16_t)fixedround((uint16_t)val) >> fixedShift); }
CV_ALWAYS_INLINE operator fixedpoint32() const { return (int32_t)val << (fixedpoint32::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator uint8_t() const { return saturate_cast<uint8_t>(); }
CV_ALWAYS_INLINE operator int8_t() const { return saturate_cast<int8_t>(); }
CV_ALWAYS_INLINE operator uint16_t() const { return saturate_cast<uint16_t>(); }
CV_ALWAYS_INLINE operator int16_t() const { return saturate_cast<int16_t>(); }
CV_ALWAYS_INLINE operator int32_t() const { return saturate_cast<int32_t>(); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE fixedpoint16 zero() { return fixedpoint16(); }
static CV_ALWAYS_INLINE fixedpoint16 one() { return fixedpoint16((int16_t)(1 << fixedShift)); }
@ -324,10 +350,15 @@ public:
CV_ALWAYS_INLINE ufixedpoint16 operator << (int n) const { return ufixedpoint16((uint16_t)(val << n)); }
CV_ALWAYS_INLINE bool operator == (const ufixedpoint16& val2) const { return val == val2.val; }
template <typename ET>
CV_ALWAYS_INLINE operator ET() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE ET saturate_cast() const { return cv::saturate_cast<ET>(fixedround(val) >> fixedShift); }
CV_ALWAYS_INLINE operator ufixedpoint32() const { return (uint32_t)val << (ufixedpoint32::fixedShift - fixedShift); }
CV_ALWAYS_INLINE operator double() const { return (double)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator float() const { return (float)val / (1 << fixedShift); }
CV_ALWAYS_INLINE operator uint8_t() const { return saturate_cast<uint8_t>(); }
CV_ALWAYS_INLINE operator int8_t() const { return saturate_cast<int8_t>(); }
CV_ALWAYS_INLINE operator uint16_t() const { return saturate_cast<uint16_t>(); }
CV_ALWAYS_INLINE operator int16_t() const { return saturate_cast<int16_t>(); }
CV_ALWAYS_INLINE operator int32_t() const { return saturate_cast<int32_t>(); }
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE ufixedpoint16 zero() { return ufixedpoint16(); }
static CV_ALWAYS_INLINE ufixedpoint16 one() { return ufixedpoint16((uint16_t)(1 << fixedShift)); }
@ -335,4 +366,4 @@ public:
}
#endif
#endif

@ -43,10 +43,6 @@
//
//M*/
#ifndef WAVE_SIZE
#define WAVE_SIZE 1
#endif
inline int calc_lut(__local int* smem, int val, int tid)
{
smem[tid] = val;
@ -60,8 +56,7 @@ inline int calc_lut(__local int* smem, int val, int tid)
return smem[tid];
}
#ifdef CPU
inline void reduce(volatile __local int* smem, int val, int tid)
inline int reduce(__local volatile int* smem, int val, int tid)
{
smem[tid] = val;
barrier(CLK_LOCAL_MEM_FENCE);
@ -75,69 +70,39 @@ inline void reduce(volatile __local int* smem, int val, int tid)
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem[tid] += smem[tid + 32];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
smem[tid] += smem[tid + 16];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
smem[tid] += smem[tid + 8];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
smem[tid] += smem[tid + 4];
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
smem[tid] += smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
smem[256] = smem[tid] + smem[tid + 1];
barrier(CLK_LOCAL_MEM_FENCE);
}
#else
inline void reduce(__local volatile int* smem, int val, int tid)
{
smem[tid] = val;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128)
smem[tid] = val += smem[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64)
smem[tid] = val += smem[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem[tid] += smem[tid + 32];
#if WAVE_SIZE < 32
} barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
if (tid == 0)
{
#endif
smem[tid] += smem[tid + 16];
#if WAVE_SIZE < 16
smem[0] = (smem[0] + smem[1]) + (smem[2] + smem[3]);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
smem[tid] += smem[tid + 8];
smem[tid] += smem[tid + 4];
smem[tid] += smem[tid + 2];
smem[tid] += smem[tid + 1];
}
val = smem[0];
barrier(CLK_LOCAL_MEM_FENCE);
return val;
}
#endif
__kernel void calcLut(__global __const uchar * src, const int srcStep,
const int src_offset, __global uchar * lut,
@ -179,14 +144,7 @@ __kernel void calcLut(__global __const uchar * src, const int srcStep,
}
// find number of overall clipped samples
reduce(smem, clipped, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
clipped = smem[256];
#else
clipped = smem[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
clipped = reduce(smem, clipped, tid);
// redistribute clipped samples evenly
int redistBatch = clipped / 256;

@ -1783,6 +1783,10 @@ static PyMethodDef special_methods[] = {
{"createTrackbar", pycvCreateTrackbar, METH_VARARGS, "createTrackbar(trackbarName, windowName, value, count, onChange) -> None"},
{"createButton", (PyCFunction)pycvCreateButton, METH_VARARGS | METH_KEYWORDS, "createButton(buttonName, onChange [, userData, buttonType, initialButtonState]) -> None"},
{"setMouseCallback", (PyCFunction)pycvSetMouseCallback, METH_VARARGS | METH_KEYWORDS, "setMouseCallback(windowName, onMouse [, param]) -> None"},
#endif
#ifdef HAVE_OPENCV_DNN
{"dnn_registerLayer", (PyCFunction)pyopencv_cv_dnn_registerLayer, METH_VARARGS | METH_KEYWORDS, "registerLayer(type, class) -> None"},
{"dnn_unregisterLayer", (PyCFunction)pyopencv_cv_dnn_unregisterLayer, METH_VARARGS | METH_KEYWORDS, "unregisterLayer(type) -> None"},
#endif
{NULL, NULL},
};

@ -145,6 +145,18 @@ private:
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
std::vector<cuda::GpuMat> gpu_dst_pyr_laplace_;
std::vector<cuda::GpuMat> gpu_dst_band_weights_;
std::vector<Point> gpu_tl_points_;
std::vector<cuda::GpuMat> gpu_imgs_with_border_;
std::vector<std::vector<cuda::GpuMat> > gpu_weight_pyr_gauss_vec_;
std::vector<std::vector<cuda::GpuMat> > gpu_src_pyr_laplace_vec_;
std::vector<std::vector<cuda::GpuMat> > gpu_ups_;
cuda::GpuMat gpu_dst_mask_;
cuda::GpuMat gpu_mask_;
cuda::GpuMat gpu_img_;
cuda::GpuMat gpu_weight_map_;
cuda::GpuMat gpu_add_mask_;
int gpu_feed_idx_;
bool gpu_initialized_;
#endif
};

@ -221,6 +221,7 @@ MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount();
gpu_feed_idx_ = 0;
#else
(void) try_gpu;
can_use_gpu_ = false;
@ -248,6 +249,15 @@ void MultiBandBlender::prepare(Rect dst_roi)
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if (can_use_gpu_)
{
gpu_initialized_ = false;
gpu_feed_idx_ = 0;
gpu_tl_points_.clear();
gpu_weight_pyr_gauss_vec_.clear();
gpu_src_pyr_laplace_vec_.clear();
gpu_ups_.clear();
gpu_imgs_with_border_.clear();
gpu_dst_pyr_laplace_.resize(num_bands_ + 1);
gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3);
gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0));
@ -320,7 +330,37 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
int64 t = getTickCount();
#endif
UMat img = _img.getUMat();
UMat img;
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
// If using gpu save the top left coordinate when running first time after prepare
if (can_use_gpu_)
{
if (!gpu_initialized_)
{
gpu_tl_points_.push_back(tl);
}
else
{
tl = gpu_tl_points_[gpu_feed_idx_];
}
}
// If _img is not a GpuMat get it as UMat from the InputArray object.
// If it is GpuMat make a dummy object with right dimensions but no data and
// get _img as a GpuMat
if (!_img.isGpuMat())
#endif
{
img = _img.getUMat();
}
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
else
{
gpu_img_ = _img.getGpuMat();
img = UMat(gpu_img_.rows, gpu_img_.cols, gpu_img_.type());
}
#endif
CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3);
CV_Assert(mask.type() == CV_8U);
@ -357,42 +397,63 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if (can_use_gpu_)
{
// Create the source image Laplacian pyramid
cuda::GpuMat gpu_img;
gpu_img.upload(img);
cuda::GpuMat img_with_border;
cuda::copyMakeBorder(gpu_img, img_with_border, top, bottom, left, right, BORDER_REFLECT);
std::vector<cuda::GpuMat> gpu_src_pyr_laplace(num_bands_ + 1);
img_with_border.convertTo(gpu_src_pyr_laplace[0], CV_16S);
for (int i = 0; i < num_bands_; ++i)
cuda::pyrDown(gpu_src_pyr_laplace[i], gpu_src_pyr_laplace[i + 1]);
for (int i = 0; i < num_bands_; ++i)
if (!gpu_initialized_)
{
cuda::GpuMat up;
cuda::pyrUp(gpu_src_pyr_laplace[i + 1], up);
cuda::subtract(gpu_src_pyr_laplace[i], up, gpu_src_pyr_laplace[i]);
gpu_imgs_with_border_.push_back(cuda::GpuMat());
gpu_weight_pyr_gauss_vec_.push_back(std::vector<cuda::GpuMat>(num_bands_+1));
gpu_src_pyr_laplace_vec_.push_back(std::vector<cuda::GpuMat>(num_bands_+1));
gpu_ups_.push_back(std::vector<cuda::GpuMat>(num_bands_));
}
// Create the weight map Gaussian pyramid
cuda::GpuMat gpu_mask;
gpu_mask.upload(mask);
cuda::GpuMat weight_map;
std::vector<cuda::GpuMat> gpu_weight_pyr_gauss(num_bands_ + 1);
// If _img is not GpuMat upload it to gpu else gpu_img_ was set already
if (!_img.isGpuMat())
{
gpu_img_.upload(img);
}
if (weight_type_ == CV_32F)
// Create the source image Laplacian pyramid
cuda::copyMakeBorder(gpu_img_, gpu_imgs_with_border_[gpu_feed_idx_], top, bottom,
left, right, BORDER_REFLECT);
gpu_imgs_with_border_[gpu_feed_idx_].convertTo(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][0], CV_16S);
for (int i = 0; i < num_bands_; ++i)
cuda::pyrDown(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i],
gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i + 1]);
for (int i = 0; i < num_bands_; ++i)
{
gpu_mask.convertTo(weight_map, CV_32F, 1. / 255.);
cuda::pyrUp(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i + 1], gpu_ups_[gpu_feed_idx_][i]);
cuda::subtract(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i],
gpu_ups_[gpu_feed_idx_][i],
gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i]);
}
else // weight_type_ == CV_16S
// Create the weight map Gaussian pyramid only if not yet initialized
if (!gpu_initialized_)
{
gpu_mask.convertTo(weight_map, CV_16S);
cuda::GpuMat add_mask;
cuda::compare(gpu_mask, 0, add_mask, CMP_NE);
cuda::add(weight_map, Scalar::all(1), weight_map, add_mask);
if (mask.isGpuMat())
{
gpu_mask_ = mask.getGpuMat();
}
else
{
gpu_mask_.upload(mask);
}
if (weight_type_ == CV_32F)
{
gpu_mask_.convertTo(gpu_weight_map_, CV_32F, 1. / 255.);
}
else // weight_type_ == CV_16S
{
gpu_mask_.convertTo(gpu_weight_map_, CV_16S);
cuda::compare(gpu_mask_, 0, gpu_add_mask_, CMP_NE);
cuda::add(gpu_weight_map_, Scalar::all(1), gpu_weight_map_, gpu_add_mask_);
}
cuda::copyMakeBorder(gpu_weight_map_, gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][0], top,
bottom, left, right, BORDER_CONSTANT);
for (int i = 0; i < num_bands_; ++i)
cuda::pyrDown(gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i],
gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i + 1]);
}
cuda::copyMakeBorder(weight_map, gpu_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
for (int i = 0; i < num_bands_; ++i)
cuda::pyrDown(gpu_weight_pyr_gauss[i], gpu_weight_pyr_gauss[i + 1]);
int y_tl = tl_new.y - dst_roi_.y;
int y_br = br_new.y - dst_roi_.y;
@ -403,9 +464,9 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
for (int i = 0; i <= num_bands_; ++i)
{
Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace[i];
cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i];
cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc);
cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss[i];
cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i];
cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc);
using namespace cv::cuda::device::blend;
@ -420,6 +481,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
x_tl /= 2; y_tl /= 2;
x_br /= 2; y_br /= 2;
}
++gpu_feed_idx_;
return;
}
#endif
@ -445,7 +507,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
UMat weight_map;
std::vector<UMat> weight_pyr_gauss(num_bands_ + 1);
if(weight_type_ == CV_32F)
if (weight_type_ == CV_32F)
{
mask.getUMat().convertTo(weight_map, CV_32F, 1./255.);
}
@ -486,7 +548,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW);
Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ);
Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW);
if(weight_type_ == CV_32F)
if (weight_type_ == CV_32F)
{
for (int y = 0; y < rc.height; ++y)
{
@ -540,11 +602,15 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
{
cv::UMat dst_band_weights_0;
Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
if (can_use_gpu_)
{
if (!gpu_initialized_)
{
gpu_ups_.push_back(std::vector<cuda::GpuMat>(num_bands_+1));
}
for (int i = 0; i <= num_bands_; ++i)
{
cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i];
@ -564,20 +630,50 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
// Restore image from Laplacian pyramid
for (size_t i = num_bands_; i > 0; --i)
{
cuda::GpuMat up;
cuda::pyrUp(gpu_dst_pyr_laplace_[i], up);
cuda::add(up, gpu_dst_pyr_laplace_[i - 1], gpu_dst_pyr_laplace_[i - 1]);
cuda::pyrUp(gpu_dst_pyr_laplace_[i], gpu_ups_[gpu_ups_.size()-1][num_bands_-i]);
cuda::add(gpu_ups_[gpu_ups_.size()-1][num_bands_-i],
gpu_dst_pyr_laplace_[i - 1],
gpu_dst_pyr_laplace_[i - 1]);
}
// If dst is GpuMat do masking on gpu and return dst as a GpuMat
// else download the image to cpu and return it as an ordinary Mat
if (dst.isGpuMat())
{
cuda::GpuMat &gpu_dst = dst.getGpuMatRef();
cuda::compare(gpu_dst_band_weights_[0](dst_rc), WEIGHT_EPS, gpu_dst_mask_, CMP_GT);
cuda::compare(gpu_dst_mask_, 0, gpu_mask_, CMP_EQ);
gpu_dst_pyr_laplace_[0](dst_rc).setTo(Scalar::all(0), gpu_mask_);
gpu_dst_pyr_laplace_[0](dst_rc).convertTo(gpu_dst, CV_16S);
}
else
{
gpu_dst_pyr_laplace_[0](dst_rc).download(dst_);
Mat dst_band_weights_0;
gpu_dst_band_weights_[0].download(dst_band_weights_0);
gpu_dst_pyr_laplace_[0](dst_rc).download(dst_);
gpu_dst_band_weights_[0].download(dst_band_weights_0);
compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
Blender::blend(dst, dst_mask);
}
gpu_dst_pyr_laplace_.clear();
gpu_dst_band_weights_.clear();
// Set destination Mats to 0 so new image can be blended
for (size_t i = 0; i < num_bands_ + 1; ++i)
{
gpu_dst_band_weights_[i].setTo(0);
gpu_dst_pyr_laplace_[i].setTo(Scalar::all(0));
}
gpu_feed_idx_ = 0;
gpu_initialized_ = true;
}
else
#endif
{
cv::UMat dst_band_weights_0;
for (int i = 0; i <= num_bands_; ++i)
normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]);
@ -588,11 +684,11 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
dst_pyr_laplace_.clear();
dst_band_weights_.clear();
}
compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
Blender::blend(dst, dst_mask);
Blender::blend(dst, dst_mask);
}
}

@ -50,12 +50,16 @@ namespace opencv_test { namespace {
detail::MultiBandBlender blender(try_cuda, 5);
blender.prepare(Rect(0, 0, max(im1.cols, im2.cols), max(im1.rows, im2.rows)));
blender.feed(im1, mask1, Point(0,0));
blender.feed(im2, mask2, Point(0,0));
Mat result_s, result_mask;
blender.blend(result_s, result_mask);
result_s.convertTo(result, CV_8U);
// If using cuda try blending multiple times without calling prepare inbetween
for (int i = 0; i < (try_cuda ? 10 : 1); ++i) {
blender.feed(im1, mask1, Point(0, 0));
blender.feed(im2, mask2, Point(0, 0));
Mat result_s, result_mask;
blender.blend(result_s, result_mask);
result_s.convertTo(result, CV_8U);
}
}
TEST(CUDA_MultiBandBlender, Accuracy)

@ -86,6 +86,9 @@ endif()
if (WIN32 AND HAVE_MSMF)
list(APPEND videoio_srcs ${CMAKE_CURRENT_LIST_DIR}/src/cap_msmf.hpp)
list(APPEND videoio_srcs ${CMAKE_CURRENT_LIST_DIR}/src/cap_msmf.cpp)
if (HAVE_DXVA)
add_definitions(-DHAVE_DXVA)
endif()
endif()
if (WIN32 AND HAVE_VFW)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -490,6 +490,7 @@ double CvCapture_OpenNI2::getCommonProperty( int propIdx ) const
break;
case CV_CAP_PROP_OPENNI2_SYNC :
propValue = const_cast<CvCapture_OpenNI2 *>(this)->device.getDepthColorSyncEnabled();
break;
case CV_CAP_PROP_OPENNI2_MIRROR:
{
bool isMirroring = false;

@ -317,6 +317,12 @@ if __name__ == "__main__":
if os.path.realpath(args.work_dir) == os.path.realpath(args.opencv_dir):
raise Fail("Specify workdir (building from OpenCV source directory is not supported)")
# Relative paths become invalid in sub-directories
if args.opencv_dir is not None and not os.path.isabs(args.opencv_dir):
args.opencv_dir = os.path.abspath(args.opencv_dir)
if args.extra_modules_path is not None and not os.path.isabs(args.extra_modules_path):
args.extra_modules_path = os.path.abspath(args.extra_modules_path)
cpath = args.config
if not os.path.exists(cpath):
cpath = os.path.join(SCRIPT_DIR, cpath)

@ -0,0 +1,69 @@
import cv2 as cv
import argparse
parser = argparse.ArgumentParser(
description='This sample shows how to define custom OpenCV deep learning layers in Python. '
'Holistically-Nested Edge Detection (https://arxiv.org/abs/1504.06375) neural network '
'is used as an example model. Find a pre-trained model at https://github.com/s9xie/hed.')
parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
parser.add_argument('--prototxt', help='Path to deploy.prototxt', required=True)
parser.add_argument('--caffemodel', help='Path to hed_pretrained_bsds.caffemodel', required=True)
parser.add_argument('--width', help='Resize input image to a specific width', default=500, type=int)
parser.add_argument('--height', help='Resize input image to a specific height', default=500, type=int)
args = parser.parse_args()
#! [CropLayer]
class CropLayer(object):
def __init__(self, params, blobs):
self.xstart = 0
self.xend = 0
self.ystart = 0
self.yend = 0
# Our layer receives two inputs. We need to crop the first input blob
# to match a shape of the second one (keeping batch size and number of channels)
def getMemoryShapes(self, inputs):
inputShape, targetShape = inputs[0], inputs[1]
batchSize, numChannels = inputShape[0], inputShape[1]
height, width = targetShape[2], targetShape[3]
self.ystart = (inputShape[2] - targetShape[2]) / 2
self.xstart = (inputShape[3] - targetShape[3]) / 2
self.yend = self.ystart + height
self.xend = self.xstart + width
return [[batchSize, numChannels, height, width]]
def forward(self, inputs):
return [inputs[0][:,:,self.ystart:self.yend,self.xstart:self.xend]]
#! [CropLayer]
#! [Register]
cv.dnn_registerLayer('Crop', CropLayer)
#! [Register]
# Load the model.
net = cv.dnn.readNet(args.prototxt, args.caffemodel)
kWinName = 'Holistically-Nested Edge Detection'
cv.namedWindow('Input', cv.WINDOW_NORMAL)
cv.namedWindow(kWinName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0)
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
cv.waitKey()
break
cv.imshow('Input', frame)
inp = cv.dnn.blobFromImage(frame, scalefactor=1.0, size=(args.width, args.height),
mean=(104.00698793, 116.66876762, 122.67891434),
swapRB=False, crop=False)
net.setInput(inp)
out = net.forward()
out = out[0, 0]
out = cv.resize(out, (frame.shape[1], frame.shape[0]))
cv.imshow(kWinName, out)
Loading…
Cancel
Save