Merge pull request #750 from ludv1x:dnn-python-bindings

pull/784/head
Vadim Pisarevsky 9 years ago
commit b1346e5ab6
  1. 24
      modules/dnn/CMakeLists.txt
  2. 15
      modules/dnn/cmake/OpenCVFindLibProtobuf.cmake
  3. 93
      modules/dnn/cmake/OpenCVFindMKL.cmake
  4. 31
      modules/dnn/cmake/download_model.cmake
  5. 95
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  6. 36
      modules/dnn/include/opencv2/dnn/blob.hpp
  7. 6
      modules/dnn/include/opencv2/dnn/blob.inl.hpp
  8. 8
      modules/dnn/include/opencv2/dnn/dict.hpp
  9. 80
      modules/dnn/include/opencv2/dnn/dnn.hpp
  10. 2
      modules/dnn/include/opencv2/dnn/dnn.inl.hpp
  11. 29674
      modules/dnn/misc/caffe/caffe.pb.cc
  12. 22257
      modules/dnn/misc/caffe/caffe.pb.h
  13. 108
      modules/dnn/misc/python/pyopencv_dnn.hpp
  14. 1547
      modules/dnn/samples/VGG_VOC0712_SSD_300x300_iter_60000.prototxt
  15. 26
      modules/dnn/samples/caffe_googlenet.cpp
  16. 4
      modules/dnn/samples/fcn_semsegm.cpp
  17. 34
      modules/dnn/samples/googlenet_python.py
  18. 153
      modules/dnn/samples/ssd_object_detection.cpp
  19. 79
      modules/dnn/scripts/download_model.py
  20. 7
      modules/dnn/scripts/test_models.json
  21. 11
      modules/dnn/src/blob.cpp
  22. 95
      modules/dnn/src/caffe/caffe.proto
  23. 466
      modules/dnn/src/caffe/caffe_importer.cpp
  24. BIN
      modules/dnn/src/caffe/compiled/caffe.tar.gz
  25. 45
      modules/dnn/src/caffe/glog_emulator.hpp
  26. 88
      modules/dnn/src/caffe/layer_loaders.cpp
  27. 121
      modules/dnn/src/dnn.cpp
  28. 14
      modules/dnn/src/init.cpp
  29. 26
      modules/dnn/src/layers/convolution_layer.cpp
  30. 103
      modules/dnn/src/layers/crop_layer.cpp
  31. 4
      modules/dnn/src/layers/crop_layer.hpp
  32. 750
      modules/dnn/src/layers/detection_output_layer.cpp
  33. 226
      modules/dnn/src/layers/detection_output_layer.hpp
  34. 117
      modules/dnn/src/layers/flatten_layer.cpp
  35. 67
      modules/dnn/src/layers/flatten_layer.hpp
  36. 99
      modules/dnn/src/layers/layers_common.cpp
  37. 4
      modules/dnn/src/layers/layers_common.hpp
  38. 5
      modules/dnn/src/layers/lrn_layer.cpp
  39. 201
      modules/dnn/src/layers/normalize_bbox_layer.cpp
  40. 94
      modules/dnn/src/layers/normalize_bbox_layer.hpp
  41. 4
      modules/dnn/src/layers/op_im2col.cpp
  42. 40
      modules/dnn/src/layers/op_im2col.hpp
  43. 185
      modules/dnn/src/layers/permute_layer.cpp
  44. 75
      modules/dnn/src/layers/permute_layer.hpp
  45. 16
      modules/dnn/src/layers/pooling_layer.cpp
  46. 307
      modules/dnn/src/layers/prior_box_layer.cpp
  47. 101
      modules/dnn/src/layers/prior_box_layer.hpp
  48. 68
      modules/dnn/src/layers/recurrent_layers.cpp
  49. 8
      modules/dnn/src/torch/torch_importer.cpp
  50. 10
      modules/dnn/tutorials/tutorial_dnn_googlenet.markdown

@ -52,12 +52,10 @@ endif()
# ----------------------------------------------------------------------------
# Download pre-trained models for complex testing on GoogLeNet and AlexNet
# ----------------------------------------------------------------------------
OCV_OPTION(${the_module}_DOWNLOAD_CAFFE_MODELS "Use GoogLeNet Caffe model for testing" OFF IF BUILD_TESTS AND PYTHON2_EXECUTABLE AND DEFINED ENV{OPENCV_TEST_DATA_PATH})
if(BUILD_TESTS AND PYTHON2_EXECUTABLE AND DEFINED ENV{OPENCV_TEST_DATA_PATH}
AND (DOWNLOAD_EXTERNAL_TEST_DATA OR ${the_module}_DOWNLOAD_CAFFE_MODELS))
OCV_OPTION(${the_module}_DOWNLOAD_CAFFE_MODELS "Use GoogLeNet Caffe model for testing" OFF IF BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH})
if(BUILD_TESTS AND DEFINED ENV{OPENCV_TEST_DATA_PATH} AND (DOWNLOAD_EXTERNAL_TEST_DATA OR ${the_module}_DOWNLOAD_CAFFE_MODELS))
add_custom_command( TARGET opencv_test_${name} POST_BUILD
COMMAND ${PYTHON2_EXECUTABLE} download_model.py test_models.json
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/scripts )
COMMAND ${CMAKE_COMMAND} -Dmodel=GoogleNet -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/download_model.cmake)
add_definitions(-DENABLE_CAFFE_MODEL_TESTS=1)
endif()
@ -68,21 +66,29 @@ OCV_OPTION(${the_module}_BUILD_TORCH_IMPORTER "Build Torch model importer (exper
if(${the_module}_BUILD_TORCH_IMPORTER)
add_definitions(-DENABLE_TORCH_IMPORTER=1)
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4702 /wd4127 /wd4267) #supress warnings in original torch files
if(NOT DEFINED HAVE_TORCH_EXE)
execute_process(COMMAND th ${CMAKE_CURRENT_SOURCE_DIR}/testdata/dnn/torch/torch_nn_echo.lua RESULT_VARIABLE TORCH_EXE_STATUS)
set(HAVE_TORCH_EXE OFF)
if(${TORCH_EXE_STATUS} EQUAL 0)
set(HAVE_TORCH_EXE ON)
endif()
set(HAVE_TORCH_EXE ${HAVE_TORCH_EXE} CACHE INTERNAL "Have torch binary")
endif()
endif()
# ----------------------------------------------------------------------------
# Generating test data for Torch importer
# ----------------------------------------------------------------------------
OCV_OPTION(${the_module}_BUILD_TORCH_TESTS "Build Torch tests (installed torch7 with nn module is required)" ON IF BUILD_TESTS AND ${the_module}_BUILD_TORCH_IMPORTER)
OCV_OPTION(${the_module}_BUILD_TORCH_TESTS "Build Torch tests (installed torch7 with nn module is required)" ON IF BUILD_TESTS AND ${the_module}_BUILD_TORCH_IMPORTER AND HAVE_TORCH_EXE)
if(${the_module}_BUILD_TORCH_TESTS)
if(NOT DEFINED ENV{OPENCV_TEST_DATA_PATH})
message(FATAL_ERROR "OPENCV_TEST_DATA_PATH environment variable was not specified")
endif()
execute_process(COMMAND th ${CMAKE_CURRENT_SOURCE_DIR}/testdata/dnn/torch/torch_nn_echo.lua RESULT_VARIABLE TORCH_STATUS)
if(TORCH_STATUS)
message(FATAL_ERROR "Torch executable \"th\" not found (status: ${TORCH_STATUS}) or nn module not found")
if(NOT HAVE_TORCH_EXE)
message(FATAL_ERROR "Torch executable \"th\" not found or nn module not found")
endif()
add_custom_command( TARGET opencv_test_${name} POST_BUILD

@ -23,20 +23,11 @@ else()
endif()
set(PROTOBUF_LIBRARIES libprotobuf)
set(PROTOBUF_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/protobuf/src ${CMAKE_CURRENT_BINARY_DIR})
set(PROTOBUF_SRCS ${CMAKE_CURRENT_BINARY_DIR}/caffe.pb.cc)
set(PROTOBUF_HDRS ${CMAKE_CURRENT_BINARY_DIR}/caffe.pb.h)
set(PROTOBUF_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/protobuf/src ${CMAKE_CURRENT_SOURCE_DIR}/misc/caffe)
set(PROTOBUF_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/misc/caffe/caffe.pb.cc)
set(PROTOBUF_HDRS ${CMAKE_CURRENT_SOURCE_DIR}/misc/caffe/caffe.pb.h)
add_definitions(-DHAVE_PROTOBUF=1)
add_custom_command(
OUTPUT ${PROTOBUF_SRCS} ${PROTOBUF_HDRS}
COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_SOURCE_DIR}/src/caffe/compiled/caffe.tar.gz
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Unpacking compiled caffe protobuf files"
VERBATIM
)
set_source_files_properties(${PROTOBUF_SRCS} ${PROTOBUF_HDRS} PROPERTIES GENERATED TRUE)
#supress warnings in autogenerated caffe.pb.* files
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-parameter)
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4125 /wd4267 /wd4127 /wd4244 /wd4512 /wd4702)

@ -13,11 +13,18 @@
# MKL_LIBRARIES - IPP libraries that are used by OpenCV
#
macro (mkl_find_lib VAR NAME DIRS)
find_path(${VAR} ${NAME} ${DIRS} NO_DEFAULT_PATH)
set(${VAR} ${${VAR}}/${NAME})
unset(${VAR} CACHE)
endmacro()
macro(mkl_fail)
set(HAVE_MKL OFF CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
unset(MKL_INCLUDE_DIRS CACHE)
unset(MKL_LIBRARIES CACHE)
return()
endmacro()
macro(get_mkl_version VERSION_FILE)
@ -42,28 +49,27 @@ endif()
#check current MKL_ROOT_DIR
if(NOT MKL_ROOT_DIR OR NOT EXISTS ${MKL_ROOT_DIR}/include/mkl.h)
set(MKLROOT_PATHS ${MKL_ROOT_DIR})
set(mkl_root_paths ${MKL_ROOT_DIR})
if(DEFINED $ENV{MKLROOT})
list(APPEND MKLROOT_PATHS $ENV{MKLROOT})
list(APPEND mkl_root_paths $ENV{MKLROOT})
endif()
if(WIN32)
set(ProgramFilesx86 "ProgramFiles(x86)")
list(APPEND MKLROOT_PATHS $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
list(APPEND mkl_root_paths $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
endif()
if(UNIX)
list(APPEND MKLROOT_PATHS "/opt/intel/mkl")
list(APPEND mkl_root_paths "/opt/intel/mkl")
endif()
find_path(MKL_ROOT_DIR include/mkl.h PATHS ${MKLROOT_PATHS})
find_path(MKL_ROOT_DIR include/mkl.h PATHS ${mkl_root_paths})
endif()
if(NOT MKL_ROOT_DIR)
mkl_fail()
return()
endif()
set(MKL_INCLUDE_DIRS ${MKL_ROOT_DIR}/include)
set(MKL_INCLUDE_HEADERS ${MKL_INCLUDE_DIRS}/mkl.h ${MKL_INCLUDE_DIRS}/mkl_version.h)
get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
#determine arch
if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
@ -81,43 +87,50 @@ else()
set(MKL_ARCH "ia32")
endif()
if(MSVC)
set(MKL_EXT ".lib")
set(MKL_PRE "")
else()
set(MKL_EXT ".a")
set(MKL_PRE "lib")
endif()
set(MKL_LIB_DIR ${MKL_ROOT_DIR}/lib/${MKL_ARCH})
set(MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_core${MKL_EXT} ${MKL_LIB_DIR}/${MKL_PRE}mkl_intel_${MKL_LP64}${MKL_EXT})
if(MKL_WITH_TBB)
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_tbb_thread${MKL_EXT})
list(APPEND MKL_LIBRARIES ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}/tbb${MKL_EXT})
elseif(MKL_WITH_OPENMP)
message(FATAL_ERROR "Multithreaded MKL is not supported yet")
if(${MKL_VERSION_STR} VERSION_GREATER "11.3.0" OR ${MKL_VERSION_STR} VERSION_EQUAL "11.3.0")
set(mkl_lib_find_paths
${MKL_ROOT_DIR}/lib
${MKL_ROOT_DIR}/lib/${MKL_ARCH} ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH})
set(mkl_lib_list
mkl_core
mkl_intel_${MKL_LP64})
if(MKL_WITH_TBB)
list(APPEND mkl_lib_list mkl_tbb_thread tbb)
elseif(MKL_WITH_OPENMP)
if(MSVC)
list(APPEND mkl_lib_list mkl_intel_thread libiomp5md)
else()
list(APPEND mkl_lib_list libmkl_gnu_thread)
endif()
else()
list(APPEND mkl_lib_list mkl_sequential)
endif()
else()
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_sequential${MKL_EXT})
message(STATUS "MKL version ${MKL_VERSION_STR} is not supported")
mkl_fail()
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MKL MKL_INCLUDE_HEADERS MKL_LIBRARIES)
if(MKL_FOUND)
get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
set(HAVE_MKL ON CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
if(NOT UNIX)
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
else()
#it's ugly but helps to avoid cyclic lib problem
set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE STRING "MKL libarries")
set(MKL_LIBRARIES "")
foreach(lib ${mkl_lib_list})
find_library(${lib} ${lib} ${mkl_lib_find_paths})
mark_as_advanced(${lib})
if(NOT ${lib})
mkl_fail()
endif()
list(APPEND MKL_LIBRARIES ${${lib}})
endforeach()
message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
set(HAVE_MKL ON CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
if(NOT UNIX)
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
else()
#it's ugly but helps to avoid cyclic lib problem
set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE STRING "MKL libarries")
endif()

@ -0,0 +1,31 @@
set(GoogleNet_url "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel")
set(GoogleNet_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/bvlc_googlenet.caffemodel")
set(GoogleNet_sha "405fc5acd08a3bb12de8ee5e23a96bec22f08204")
set(VGG16_url "http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel")
set(GG16_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/VGG_ILSVRC_16_layers.caffemodel")
set(voc-fcn32s_url "http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel")
set(voc-fcn32s_dst "$ENV{OPENCV_TEST_DATA_PATH}/dnn/fcn32s-heavy-pascal.caffemodel")
if(NOT model)
set(model "GoogleNet")
endif()
message(STATUS "Downloading ${${model}_url} to ${${model}_dst}")
if(NOT EXISTS ${${model}_dst})
if(${${model}_sha})
file(DOWNLOAD ${${model}_url} ${${model}_dst} SHOW_PROGRESS EXPECTED_HASH SHA1=${${model}_sha} STATUS status_vec)
else()
file(DOWNLOAD ${${model}_url} ${${model}_dst} SHOW_PROGRESS STATUS status_vec)
endif()
list(GET status_vec 0 status)
list(GET status_vec 1 status_msg)
if(status EQUAL 0)
message(STATUS "Ok! ${status_msg}")
else()
message(STATUS "Fail! ${status_msg}")
endif()
endif()

@ -77,7 +77,7 @@ namespace dnn
{
public:
/** Creates instance of LSTM layer */
static Ptr<LSTMLayer> create();
static CV_WRAP Ptr<LSTMLayer> create();
/** Set trained weights for LSTM layer.
LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
@ -109,27 +109,27 @@ namespace dnn
@param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
@param b is bias vector (i.e. according to abovemtioned notation is @f$ b @f$)
*/
virtual void setWeights(const Blob &Wh, const Blob &Wx, const Blob &b) = 0;
CV_WRAP virtual void setWeights(const Blob &Wh, const Blob &Wx, const Blob &b) = 0;
/** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
* @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
* where `Wh` is parameter from setWeights().
*/
virtual void setOutShape(const BlobShape &outTailShape = BlobShape::empty()) = 0;
CV_WRAP virtual void setOutShape(const BlobShape &outTailShape = BlobShape::empty()) = 0;
/** @brief Set @f$ h_{t-1} @f$ value that will be used in next forward() calls.
* @details By-default @f$ h_{t-1} @f$ is inited by zeros and updated after each forward() call.
*/
virtual void setH(const Blob &H) = 0;
CV_WRAP virtual void setH(const Blob &H) = 0;
/** @brief Returns current @f$ h_{t-1} @f$ value (deep copy). */
virtual Blob getH() const = 0;
CV_WRAP virtual Blob getH() const = 0;
/** @brief Set @f$ c_{t-1} @f$ value that will be used in next forward() calls.
* @details By-default @f$ c_{t-1} @f$ is inited by zeros and updated after each forward() call.
*/
virtual void setC(const Blob &C) = 0;
CV_WRAP virtual void setC(const Blob &C) = 0;
/** @brief Returns current @f$ c_{t-1} @f$ value (deep copy). */
virtual Blob getC() const = 0;
CV_WRAP virtual Blob getC() const = 0;
/** @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
*
@ -139,12 +139,12 @@ namespace dnn
* If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
* In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
*/
virtual void setUseTimstampsDim(bool use = true) = 0;
CV_WRAP virtual void setUseTimstampsDim(bool use = true) = 0;
/** @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
* @details Shape of the second output is the same as first output.
*/
virtual void setProduceCellOutput(bool produce = false) = 0;
CV_WRAP virtual void setProduceCellOutput(bool produce = false) = 0;
/** In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
* @param input should contain packed values @f$x_t@f$
@ -168,7 +168,7 @@ namespace dnn
{
public:
/** Creates instance of RNNLayer */
static Ptr<RNNLayer> create();
static CV_WRAP Ptr<RNNLayer> create();
/** Setups learned weights.
@ -184,12 +184,12 @@ namespace dnn
@param Who is @f$ W_{xo} @f$ matrix
@param bo is @f$ b_{o} @f$ vector
*/
virtual void setWeights(const Blob &Wxh, const Blob &bh, const Blob &Whh, const Blob &Who, const Blob &bo) = 0;
CV_WRAP virtual void setWeights(const Blob &Wxh, const Blob &bh, const Blob &Whh, const Blob &Who, const Blob &bo) = 0;
/** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
* @details Shape of the second output is the same as first output.
*/
virtual void setProduceHiddenOutput(bool produce = false) = 0;
CV_WRAP virtual void setProduceHiddenOutput(bool produce = false) = 0;
/** Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
@ -209,21 +209,21 @@ namespace dnn
{
public:
Size kernel, stride, pad;
CV_PROP_RW Size kernel, stride, pad, dilation;
};
class CV_EXPORTS_W ConvolutionLayer : public BaseConvolutionLayer
{
public:
static Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
};
class CV_EXPORTS_W DeconvolutionLayer : public BaseConvolutionLayer
{
public:
static Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
static CV_WRAP Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0), Size dilation = Size(1, 1));
};
class CV_EXPORTS_W LRNLayer : public Layer
@ -235,12 +235,12 @@ namespace dnn
CHANNEL_NRM,
SPATIAL_NRM
};
int type;
CV_PROP_RW int type;
int size;
double alpha, beta;
CV_PROP_RW int size;
CV_PROP_RW double alpha, beta;
static Ptr<LRNLayer> create(int type = CHANNEL_NRM, int size = 5, double alpha = 1, double beta = 0.75);
static CV_WRAP Ptr<LRNLayer> create(int type = LRNLayer::CHANNEL_NRM, int size = 5, double alpha = 1, double beta = 0.75);
};
class CV_EXPORTS_W PoolingLayer : public Layer
@ -254,34 +254,36 @@ namespace dnn
STOCHASTIC
};
int type;
Size kernel, stride, pad;
CV_PROP_RW int type;
CV_PROP_RW Size kernel, stride, pad;
CV_PROP_RW bool globalPooling;
static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
static CV_WRAP Ptr<PoolingLayer> create(int type = PoolingLayer::MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
static CV_WRAP Ptr<PoolingLayer> createGlobal(int type = PoolingLayer::MAX);
};
class CV_EXPORTS_W SoftmaxLayer : public Layer
{
public:
static Ptr<SoftmaxLayer> create(int axis = 1);
static CV_WRAP Ptr<SoftmaxLayer> create(int axis = 1);
};
class CV_EXPORTS_W InnerProductLayer : public Layer
{
public:
int axis;
CV_PROP_RW int axis;
static Ptr<InnerProductLayer> create(int axis = 1);
static CV_WRAP Ptr<InnerProductLayer> create(int axis = 1);
};
class CV_EXPORTS_W MVNLayer : public Layer
{
public:
double eps;
bool normVariance, acrossChannels;
CV_PROP_RW double eps;
CV_PROP_RW bool normVariance, acrossChannels;
static Ptr<MVNLayer> create(bool normVariance = true, bool acrossChannels = false, double eps = 1e-9);
static CV_WRAP Ptr<MVNLayer> create(bool normVariance = true, bool acrossChannels = false, double eps = 1e-9);
};
/* Reshaping */
@ -289,10 +291,10 @@ namespace dnn
class CV_EXPORTS_W ReshapeLayer : public Layer
{
public:
BlobShape newShapeDesc;
Range newShapeRange;
CV_PROP_RW BlobShape newShapeDesc;
CV_PROP_RW Range newShapeRange;
static Ptr<ReshapeLayer> create(const BlobShape &newShape, Range applyingRange = Range::all());
static CV_WRAP Ptr<ReshapeLayer> create(const BlobShape &newShape, Range applyingRange = Range::all());
};
class CV_EXPORTS_W ConcatLayer : public Layer
@ -300,7 +302,7 @@ namespace dnn
public:
int axis;
static Ptr<ConcatLayer> create(int axis = 1);
static CV_WRAP Ptr<ConcatLayer> create(int axis = 1);
};
class CV_EXPORTS_W SplitLayer : public Layer
@ -308,17 +310,17 @@ namespace dnn
public:
int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
static Ptr<SplitLayer> create(int outputsCount = -1);
static CV_WRAP Ptr<SplitLayer> create(int outputsCount = -1);
};
class CV_EXPORTS_W SliceLayer : public Layer
{
public:
int axis;
std::vector<int> sliceIndices;
CV_PROP_RW int axis;
CV_PROP std::vector<int> sliceIndices;
static Ptr<SliceLayer> create(int axis);
static Ptr<SliceLayer> create(int axis, const std::vector<int> &sliceIndices);
static CV_WRAP Ptr<SliceLayer> create(int axis);
static CV_WRAP Ptr<SliceLayer> create(int axis, const std::vector<int> &sliceIndices);
};
/* Activations */
@ -326,41 +328,41 @@ namespace dnn
class CV_EXPORTS_W ReLULayer : public Layer
{
public:
double negativeSlope;
CV_PROP_RW double negativeSlope;
static Ptr<ReLULayer> create(double negativeSlope = 0);
static CV_WRAP Ptr<ReLULayer> create(double negativeSlope = 0);
};
class CV_EXPORTS_W TanHLayer : public Layer
{
public:
static Ptr<TanHLayer> create();
static CV_WRAP Ptr<TanHLayer> create();
};
class CV_EXPORTS_W SigmoidLayer : public Layer
{
public:
static Ptr<SigmoidLayer> create();
static CV_WRAP Ptr<SigmoidLayer> create();
};
class CV_EXPORTS_W BNLLLayer : public Layer
{
public:
static Ptr<BNLLLayer> create();
static CV_WRAP Ptr<BNLLLayer> create();
};
class CV_EXPORTS_W AbsLayer : public Layer
{
public:
static Ptr<AbsLayer> create();
static CV_WRAP Ptr<AbsLayer> create();
};
class CV_EXPORTS_W PowerLayer : public Layer
{
public:
double power, scale, shift;
CV_PROP_RW double power, scale, shift;
static Ptr<PowerLayer> create(double power = 1, double scale = 1, double shift = 0);
static CV_WRAP Ptr<PowerLayer> create(double power = 1, double scale = 1, double shift = 0);
};
/* Layers using in semantic segmentation */
@ -368,6 +370,9 @@ namespace dnn
class CV_EXPORTS_W CropLayer : public Layer
{
public:
CV_PROP int startAxis;
CV_PROP std::vector<int> offset;
static Ptr<CropLayer> create(int start_axis, const std::vector<int> &offset);
};

@ -54,7 +54,7 @@ namespace dnn
//! @{
/** @brief Lightweight class for storing and processing a shape of blob (or anything else). */
struct BlobShape
struct CV_EXPORTS_W BlobShape
{
BlobShape(); //!< Creates [1, 1, 1, 1] shape @todo Make more clearer behavior.
explicit BlobShape(int s0); //!< Creates 1-dim shape [@p s0]
@ -154,7 +154,7 @@ namespace dnn
/** @brief Constructs Blob from existing Mat or UMat. */
Blob(InputArray data);
/** @brief Constucts 4-dimensional blob (so-called batch) from image or array of images.
/** @brief Constructs 4-dimensional blob (so-called batch) from image or array of images.
* @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of such images)
* @param dstCn specifies size of second axis of ouptut blob
*/
@ -229,6 +229,18 @@ namespace dnn
/** @brief Checks equality of two blobs shapes. */
bool equalShape(const Blob &other) const;
/** @brief Returns slice of first two dimensions.
* @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
*/
Mat getPlane(int n, int cn);
/** @brief Returns slice of first dimension.
* @details The behaviour is similar to getPlane(), but returns all
* channels * rows * cols values, corresponding to the n-th value
* of the first dimension.
*/
Mat getPlanes(int n);
/* Shape getters of 4-dimensional blobs. */
int cols() const; //!< Returns size of the fourth axis blob.
int rows() const; //!< Returns size of the thrid axis blob.
@ -262,12 +274,6 @@ namespace dnn
float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0);
//TODO: add const ptr methods
/** @brief Returns slice of first two dimensions.
* @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
* @todo Method will be removed. Use slice() from shape_utils.hpp.
*/
Mat getPlane(int n, int cn);
/** @brief Shares data from other @p blob.
* @returns *this
*/
@ -312,17 +318,17 @@ namespace dnn
public:
enum DataState
{
UNINITIALIZED,
HEAD_AT_MAT,
HEAD_AT_UMAT,
SYNCED
UNINITIALIZED = 0,
HEAD_AT_MAT = 1 << 0,
HEAD_AT_UMAT = 1 << 1,
SYNCED = HEAD_AT_MAT | HEAD_AT_UMAT
};
enum AllocFlag
{
ALLOC_MAT = 1,
ALLOC_UMAT = 2,
ALLOC_BOTH = 3
ALLOC_MAT = HEAD_AT_MAT,
ALLOC_UMAT = HEAD_AT_UMAT,
ALLOC_BOTH = SYNCED
};
};

@ -456,6 +456,12 @@ inline Mat Blob::getPlane(int n, int cn)
return Mat(dims() - 2, sizes() + 2, type(), ptr(n, cn));
}
inline Mat Blob::getPlanes(int n)
{
CV_Assert(dims() > 3);
return Mat(dims() - 1, sizes() + 1, type(), ptr(n));
}
inline int Blob::cols() const
{
return xsize(3);

@ -59,10 +59,12 @@ namespace dnn
struct DictValue
{
DictValue(const DictValue &r);
DictValue(int p = 0) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; } //!< Constructs integer scalar
DictValue(int64 i = 0) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; } //!< Constructs integer scalar
DictValue(int i) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = i; } //!< Constructs integer scalar
DictValue(unsigned p) : type(Param::INT), pi(new AutoBuffer<int64,1>) { (*pi)[0] = p; } //!< Constructs integer scalar
DictValue(double p) : type(Param::REAL), pd(new AutoBuffer<double,1>) { (*pd)[0] = p; } //!< Constructs floating point scalar
DictValue(const String &p) : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = p; } //!< Constructs string scalar
DictValue(const String &s) : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; } //!< Constructs string scalar
DictValue(const char *s) : type(Param::STRING), ps(new AutoBuffer<String,1>) { (*ps)[0] = s; } //!< @overload
template<typename TypeIter>
static DictValue arrayInt(TypeIter begin, int size); //!< Constructs integer array
@ -111,7 +113,7 @@ class CV_EXPORTS Dict
public:
//! Checks a presence of the @p key in the dictionary.
bool has(const String &key);
bool has(const String &key) const;
//! If the @p key in the dictionary then returns pointer to its value, else returns NULL.
DictValue *ptr(const String &key);

@ -86,7 +86,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
public:
//! List of learned parameters must be stored here to allow read them by using Net::getParam().
std::vector<Blob> blobs;
CV_PROP_RW std::vector<Blob> blobs;
/** @brief Allocates internal buffers and output blobs with respect to the shape of inputs.
* @param[in] input vector of already allocated input blobs
@ -104,6 +104,18 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
virtual void forward(std::vector<Blob*> &input, std::vector<Blob> &output) = 0;
/** @brief @overload */
CV_WRAP void allocate(const std::vector<Blob> &inputs, CV_OUT std::vector<Blob> &outputs);
/** @brief @overload */
CV_WRAP std::vector<Blob> allocate(const std::vector<Blob> &inputs);
/** @brief @overload */
CV_WRAP void forward(const std::vector<Blob> &inputs, CV_IN_OUT std::vector<Blob> &outputs);
/** @brief Allocates layer and computes output. */
CV_WRAP void run(const std::vector<Blob> &inputs, CV_OUT std::vector<Blob> &outputs);
/** @brief Returns index of input blob into the input array.
* @param inputName label of input blob
*
@ -116,8 +128,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
virtual int outputNameToIndex(String outputName);
String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
String type; //!< Type name which was used for creating layer by layer factory.
CV_PROP String name; //!< Name of the layer instance, can be used for logging or other internal purposes.
CV_PROP String type; //!< Type name which was used for creating layer by layer factory.
Layer();
explicit Layer(const LayerParams &params); //!< Initializes only #name, #type and #blobs fields.
@ -135,12 +147,15 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*
* This class supports reference counting of its instances, i. e. copies point to the same instance.
*/
class CV_EXPORTS_W Net
class CV_EXPORTS_W_SIMPLE Net
{
public:
Net(); //!< Default constructor.
~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
CV_WRAP Net(); //!< Default constructor.
CV_WRAP ~Net(); //!< Destructor frees the net only if there aren't references to the net anymore.
/** Returns true if there are no layers in the network. */
CV_WRAP bool empty() const;
/** @brief Adds new layer to the net.
* @param name unique name of the adding layer.
@ -157,13 +172,18 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
/** @brief Converts string name of the layer to the integer identifier.
* @returns id of the layer, or -1 if the layer wasn't found.
*/
int getLayerId(const String &layer);
CV_WRAP int getLayerId(const String &layer);
CV_WRAP std::vector<String> getLayerNames() const;
/** @brief Container for strings and integers. */
typedef DictValue LayerId;
/** @brief Returns pointer to layer with specified name which the network use. */
CV_WRAP Ptr<Layer> getLayer(LayerId layerId);
/** @brief Delete layer for the network (not implemented yet) */
void deleteLayer(LayerId layer);
CV_WRAP void deleteLayer(LayerId layer);
/** @brief Connects output of the first layer to input of the second layer.
* @param outPin descriptor of the first layer output.
@ -178,7 +198,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*
* @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
*/
void connect(String outPin, String inpPin);
CV_WRAP void connect(String outPin, String inpPin);
/** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer.
* @param outLayerId identifier of the first layer
@ -188,19 +208,22 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*/
void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
/** @brief Sets ouputs names of the network input pseudo layer.
/** @brief Sets outputs names of the network input pseudo layer.
*
* Each net always has special own the network input pseudo layer with id=0.
* This layer stores the user blobs only and don't make any computations.
* In fact, this layer provides the only way to pass user data into the network.
* As any other layer, this layer can label its outputs and this function provides an easy way to do this.
*/
void setNetInputs(const std::vector<String> &inputBlobNames);
CV_WRAP void setNetInputs(const std::vector<String> &inputBlobNames);
/** @brief Initializes and allocates all layers. */
CV_WRAP void allocate();
/** @brief Runs forward pass for the whole network */
void forward();
/** @brief Runs forward pass to compute output of layer @p toLayer */
void forward(LayerId toLayer);
/** @brief Runs forward pass to compute output of layer @p toLayer.
* @details By default runs forward pass for the whole network.
*/
CV_WRAP void forward(LayerId toLayer = String());
/** @brief Runs forward pass to compute output of layer @p toLayer, but computations start from @p startLayer */
void forward(LayerId startLayer, LayerId toLayer);
/** @overload */
@ -222,12 +245,13 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @note If updating blob is not empty then @p blob must have the same shape,
* because network reshaping is not implemented yet.
*/
void setBlob(String outputName, const Blob &blob);
CV_WRAP void setBlob(String outputName, const Blob &blob);
/** @brief Returns the layer output blob.
* @param outputName the descriptor of the returning layer output blob.
* @see connect(String, String)
*/
Blob getBlob(String outputName);
CV_WRAP Blob getBlob(String outputName);
/** @brief Sets the new value for the learned param of the layer.
* @param layer name or id of the layer.
@ -237,13 +261,14 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @note If shape of the new blob differs from the previous shape,
* then the following forward pass may fail.
*/
void setParam(LayerId layer, int numParam, const Blob &blob);
CV_WRAP void setParam(LayerId layer, int numParam, const Blob &blob);
/** @brief Returns parameter blob of the layer.
* @param layer name or id of the layer.
* @param numParam index of the layer parameter in the Layer::blobs array.
* @see Layer::blobs
*/
Blob getParam(LayerId layer, int numParam = 0);
CV_WRAP Blob getParam(LayerId layer, int numParam = 0);
private:
@ -252,12 +277,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
};
/** @brief Small interface class for loading trained serialized models of different dnn-frameworks. */
class Importer
class CV_EXPORTS_W Importer
{
public:
/** @brief Adds loaded layers into the @p net and sets connetions between them. */
virtual void populateNet(Net net) = 0;
/** @brief Adds loaded layers into the @p net and sets connections between them. */
CV_WRAP virtual void populateNet(Net net) = 0;
virtual ~Importer();
};
@ -267,7 +292,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param caffeModel path to the .caffemodel file with learned network.
* @returns Pointer to the created importer, NULL in failure cases.
*/
CV_EXPORTS Ptr<Importer> createCaffeImporter(const String &prototxt, const String &caffeModel = String());
CV_EXPORTS_W Ptr<Importer> createCaffeImporter(const String &prototxt, const String &caffeModel = String());
/** @brief Reads a network model stored in Caffe model files.
* @details This is shortcut consisting from createCaffeImporter and Net::populateNet calls.
*/
CV_EXPORTS_W Net readNetFromCaffe(const String &prototxt, const String &caffeModel = String());
/** @brief Creates the importer of <a href="http://torch.ch">Torch7</a> framework network.
* @param filename path to the file, dumped from Torch by using torch.save() function.
@ -294,12 +324,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*
* Also some equivalents of these classes from cunn, cudnn, and fbcunn may be successfully imported.
*/
CV_EXPORTS Ptr<Importer> createTorchImporter(const String &filename, bool isBinary = true);
CV_EXPORTS_W Ptr<Importer> createTorchImporter(const String &filename, bool isBinary = true);
/** @brief Loads blob which was serialized as torch.Tensor object of Torch7 framework.
* @warning This function has the same limitations as createTorchImporter().
*/
CV_EXPORTS Blob readTorchBlob(const String &filename, bool isBinary = true);
CV_EXPORTS_W Blob readTorchBlob(const String &filename, bool isBinary = true);
//! @}
}

@ -287,7 +287,7 @@ inline std::ostream &operator<<(std::ostream &stream, const DictValue &dictv)
/////////////////////////////////////////////////////////////////
inline bool Dict::has(const String &key)
inline bool Dict::has(const String &key) const
{
return dict.count(key) != 0;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,108 @@
#ifdef HAVE_OPENCV_DNN
typedef dnn::DictValue LayerId;
typedef std::vector<cv::dnn::Blob> vector_Blob;
template<>
bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name);
template<> struct pyopencvVecConverter<dnn::Blob>
{
static bool to(PyObject* obj, std::vector<dnn::Blob>& value, const ArgInfo info)
{
if (PyArray_Check(obj))
{
value.resize(1);
return pyopencv_to(obj, value[0], info.name);
}
return pyopencv_to_generic_vec(obj, value, info);
}
static PyObject* from(const std::vector<dnn::Blob>& value)
{
return pyopencv_from_generic_vec(value);
}
};
template<>
bool pyopencv_to(PyObject *o, std::vector<dnn::Blob> &blobs, const char *name) //required for Layer::blobs RW
{
return pyopencvVecConverter<dnn::Blob>::to(o, blobs, ArgInfo(name, false));
}
template<>
bool pyopencv_to(PyObject *o, dnn::Blob &blob, const char *name)
{
Mat &dst = blob.matRef();
if (!pyopencv_to(o, dst, name))
return false;
if (PyArray_Check(o)) //try fix channels
{
PyArrayObject* oarr = (PyArrayObject*) o;
if (PyArray_NDIM(oarr) == dst.dims)
return true;
int ndims = PyArray_NDIM(oarr);
std::vector<int> shape(ndims);
const npy_intp* _sizes = PyArray_DIMS(oarr);
for (int i = 0; i < ndims; i++)
shape[i] = (int)_sizes[i];
dst = dst.reshape(1, ndims, &shape[0]);
}
return true;
}
template<>
PyObject *pyopencv_from(const dnn::Blob &blob)
{
return pyopencv_from(blob.matRefConst());
}
template<>
bool pyopencv_to(PyObject *o, dnn::DictValue &dv, const char *name)
{
(void)name;
if (!o || o == Py_None)
return true; //Current state will be used
else if (PyLong_Check(o))
{
dv = dnn::DictValue((int64)PyLong_AsLongLong(o));
return true;
}
else if (PyFloat_Check(o))
{
dv = dnn::DictValue(PyFloat_AS_DOUBLE(o));
return true;
}
else if (PyString_Check(o))
{
dv = dnn::DictValue(String(PyString_AsString(o)));
return true;
}
else
return false;
}
template<>
bool pyopencv_to(PyObject *o, dnn::BlobShape &shape, const char *name)
{
std::vector<int> data;
if (!pyopencv_to_generic_vec(o, data, ArgInfo(name, false)))
return false;
shape = data.size() ? dnn::BlobShape((int)data.size(), &data[0]) : dnn::BlobShape::empty();
return true;
}
template<>
PyObject *pyopencv_from(const dnn::BlobShape &shape)
{
std::vector<int> data(shape.ptr(), shape.ptr() + shape.dims());
return pyopencv_from_generic_vec(data);
}
#endif

@ -84,23 +84,18 @@ std::vector<String> readClassNames(const char *filename = "synset_words.txt")
int main(int argc, char **argv)
{
cv::dnn::initModule(); //Required if OpenCV is built as static libs
String modelTxt = "bvlc_googlenet.prototxt";
String modelBin = "bvlc_googlenet.caffemodel";
String imageFile = (argc > 1) ? argv[1] : "space_shuttle.jpg";
//! [Create the importer of Caffe model]
Ptr<dnn::Importer> importer;
try //Try to import Caffe GoogleNet model
{
importer = dnn::createCaffeImporter(modelTxt, modelBin);
}
catch (const cv::Exception &err) //Importer can throw errors, we will catch them
{
std::cerr << err.msg << std::endl;
}
//! [Create the importer of Caffe model]
//! [Read and initialize network]
Net net = dnn::readNetFromCaffe(modelTxt, modelBin);
//! [Read and initialize network]
if (!importer)
//! [Check that network was read successfully]
if (net.empty())
{
std::cerr << "Can't load network by using the following files: " << std::endl;
std::cerr << "prototxt: " << modelTxt << std::endl;
@ -109,12 +104,7 @@ int main(int argc, char **argv)
std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
exit(-1);
}
//! [Initialize network]
dnn::Net net;
importer->populateNet(net);
importer.release(); //We don't need importer anymore
//! [Initialize network]
//! [Check that network was read successfully]
//! [Prepare blob]
Mat img = imread(imageFile);

@ -1,6 +1,7 @@
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/ocl.hpp>
using namespace cv;
using namespace cv::dnn;
@ -85,6 +86,9 @@ static void colorizeSegmentation(dnn::Blob &score, const vector<cv::Vec3b> &colo
int main(int argc, char **argv)
{
cv::dnn::initModule(); //Required if OpenCV is built as static libs
cv::ocl::setUseOpenCL(false); //OpenCL switcher
String modelTxt = fcnType + "-heavy-pascal.prototxt";
String modelBin = fcnType + "-heavy-pascal.caffemodel";
String imageFile = (argc > 1) ? argv[1] : "rgb.jpg";

@ -0,0 +1,34 @@
from __future__ import print_function
import numpy as np
import cv2
from cv2 import dnn
import timeit
def prepare_image(img):
img = cv2.resize(img, (224, 224))
#convert interleaved image (RGBRGB) to planar(RRGGBB)
blob = np.moveaxis(img, 2, 0)
blob = np.reshape(blob.astype(np.float32), (-1, 3, 224, 224))
return blob
def timeit_forward(net):
print("OpenCL:", cv2.ocl.useOpenCL())
print("Runtime:", timeit.timeit(lambda: net.forward(), number=10))
def get_class_list():
with open('synset_words.txt', 'rt') as f:
return [ x[x.find(" ") + 1 :] for x in f ]
blob = prepare_image(cv2.imread('space_shuttle.jpg'))
print("Input:", blob.shape, blob.dtype)
cv2.ocl.setUseOpenCL(True) #Disable OCL if you want
net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel')
net.setBlob(".data", blob)
net.forward()
#timeit_forward(net) #Uncomment to check performance
prob = net.getBlob("prob")
print("Output:", prob.shape, prob.dtype)
classes = get_class_list()
print("Best match", classes[prob.argmax()])

@ -0,0 +1,153 @@
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace cv::dnn;
#include <fstream>
#include <iostream>
#include <cstdlib>
using namespace std;
const size_t width = 300;
const size_t height = 300;
Mat getMean(const size_t& imageHeight, const size_t& imageWidth)
{
Mat mean;
const int meanValues[3] = {104, 117, 123};
vector<Mat> meanChannels;
for(size_t i = 0; i < 3; i++)
{
Mat channel(imageHeight, imageWidth, CV_32F, Scalar(meanValues[i]));
meanChannels.push_back(channel);
}
cv::merge(meanChannels, mean);
return mean;
}
Mat preprocess(const Mat& frame)
{
Mat preprocessed;
frame.convertTo(preprocessed, CV_32FC3);
resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images
Mat mean = getMean(width, height);
cv::subtract(preprocessed, mean, preprocessed);
return preprocessed;
}
const char* about = "This sample uses Single-Shot Detector "
"(https://arxiv.org/abs/1512.02325)"
"to detect objects on image\n"; // TODO: link
const char* params
= "{ help | false | print usage }"
"{ proto | | model configuration }"
"{ model | | model weights }"
"{ image | | image for detection }"
"{ min_confidence | 0.5 | min confidence }";
int main(int argc, char** argv)
{
cv::CommandLineParser parser(argc, argv, params);
if (parser.get<bool>("help"))
{
std::cout << about << std::endl;
parser.printMessage();
return 0;
}
cv::dnn::initModule(); //Required if OpenCV is built as static libs
String modelConfiguration = parser.get<string>("proto");
String modelBinary = parser.get<string>("model");
//! [Create the importer of Caffe model]
Ptr<dnn::Importer> importer;
// Import Caffe SSD model
try
{
importer = dnn::createCaffeImporter(modelConfiguration, modelBinary);
}
catch (const cv::Exception &err) //Importer can throw errors, we will catch them
{
cerr << err.msg << endl;
}
//! [Create the importer of Caffe model]
if (!importer)
{
cerr << "Can't load network by using the following files: " << endl;
cerr << "prototxt: " << modelConfiguration << endl;
cerr << "caffemodel: " << modelBinary << endl;
cerr << "Models can be downloaded here:" << endl;
cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl;
exit(-1);
}
//! [Initialize network]
dnn::Net net;
importer->populateNet(net);
importer.release(); //We don't need importer anymore
//! [Initialize network]
cv::Mat frame = cv::imread(parser.get<string>("image"), -1);
//! [Prepare blob]
Mat preprocessedFrame = preprocess(frame);
dnn::Blob inputBlob = dnn::Blob::fromImages(preprocessedFrame); //Convert Mat to dnn::Blob image
//! [Prepare blob]
//! [Set input blob]
net.setBlob(".data", inputBlob); //set the network input
//! [Set input blob]
//! [Make forward pass]
net.forward(); //compute output
//! [Make forward pass]
//! [Gather output]
dnn::Blob detection = net.getBlob("detection_out");
Mat detectionMat(detection.rows(), detection.cols(), CV_32F, detection.ptrf());
float confidenceThreshold = parser.get<float>("min_confidence");
for(int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if(confidence > confidenceThreshold)
{
size_t objectClass = detectionMat.at<float>(i, 1);
float xLeftBottom = detectionMat.at<float>(i, 3) * frame.cols;
float yLeftBottom = detectionMat.at<float>(i, 4) * frame.rows;
float xRightTop = detectionMat.at<float>(i, 5) * frame.cols;
float yRightTop = detectionMat.at<float>(i, 6) * frame.rows;
std::cout << "Class: " << objectClass << std::endl;
std::cout << "Confidence: " << confidence << std::endl;
std::cout << " " << xLeftBottom
<< " " << yLeftBottom
<< " " << xRightTop
<< " " << yRightTop << std::endl;
Rect object(xLeftBottom, yLeftBottom,
xRightTop - xLeftBottom,
yRightTop - yLeftBottom);
rectangle(frame, object, Scalar(0, 255, 0));
}
}
imshow("detections", frame);
waitKey();
return 0;
} // main

@ -1,79 +0,0 @@
#!/usr/bin/env python
import os
import sys
import time
import urllib
import hashlib
import argparse
import json
def reporthook(count, block_size, total_size):
"""
From http://blog.moleculea.com/2012/10/04/urlretrieve-progres-indicator/
"""
global start_time
global prev_duration
if count == 0:
start_time = time.time()
prev_duration = -1
return
duration = max(1, time.time() - start_time)
if int(duration) == int(prev_duration):
return
progress_size = int(count * block_size)
speed = int(progress_size / (1024 * duration))
percent = int(count * block_size * 100 / total_size)
sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
(percent, progress_size / (1024 * 1024), speed, duration))
sys.stdout.flush()
prev_duration = duration
# Function for checking SHA1.
def model_checks_out(filename, sha1):
with open(filename, 'r') as f:
return hashlib.sha1(f.read()).hexdigest() == sha1
def model_download(filename, url, sha1):
# Check if model exists.
if os.path.exists(filename) and model_checks_out(filename, sha1):
print("Model {} already exists.".format(filename))
return
# Download and verify model.
urllib.urlretrieve(url, filename, reporthook)
print model_checks_out(filename, sha1)
if not model_checks_out(filename, sha1):
print("ERROR: model {} did not download correctly!".format(url))
sys.exit(1)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Downloading trained model binaries.")
parser.add_argument("download_list")
args = parser.parse_args()
test_dir = os.environ.get("OPENCV_TEST_DATA_PATH")
if not test_dir:
print "ERROR: OPENCV_TEST_DATA_PATH environment not specified"
sys.exit(1)
try:
with open(args.download_list, 'r') as f:
models_to_download = json.load(f)
except:
print "ERROR: Can't pasrse {}".format(args.download_list)
sys.exit(1)
for model_name in models_to_download:
model = models_to_download[model_name]
dst_dir = os.path.join(test_dir, os.path.dirname(model['file']))
dst_file = os.path.join(test_dir, model['file'])
if not os.path.exists(dst_dir):
print "ERROR: Can't find module testdata path '{}'".format(dst_dir)
sys.exit(1)
print "Downloading model '{}' to {} from {} ...".format(model_name, dst_file, model['url'])
model_download(dst_file, model['url'], model['sha1'])

@ -1,7 +0,0 @@
{
"googlenet": {
"file": "dnn/bvlc_googlenet.caffemodel",
"url": "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel",
"sha1": "405fc5acd08a3bb12de8ee5e23a96bec22f08204"
}
}

@ -63,16 +63,15 @@ Blob::Blob(InputArray data)
#ifndef CV_DNN_UMAT
m = data.getMat();
#else
CV_Assert(data.isMat() || data.isUMat());
if (data.isMat())
if (data.isUMat())
{
m = data.getMat();
state = HEAD_AT_MAT;
um = data.getUMat();
state = HEAD_AT_UMAT;
}
else
{
um = data.getUMat();
state = HEAD_AT_UMAT;
m = data.getMat();
state = HEAD_AT_MAT;
}
#endif
}

@ -91,6 +91,75 @@ message CropParameter {
repeated uint32 offset = 2;
}
message PermuteParameter {
// The new orders of the axes of data. Notice it should be with
// in the same range as the input data, and it starts from 0.
// Do not provide repeated order.
repeated uint32 order = 1;
}
// Message that stores parameters used by NormalizeBBoxLayer
message NormalizeBBoxParameter {
optional bool across_spatial = 1 [default = true];
// Initial value of scale. Default is 1.0 for all
optional FillerParameter scale_filler = 2;
// Whether or not scale parameters are shared across channels.
optional bool channel_shared = 3 [default = true];
// Epsilon for not dividing by zero while normalizing variance
optional float eps = 4 [default = 1e-10];
}
// Message that store parameters used by PriorBoxLayer
message PriorBoxParameter {
// Encode/decode type.
enum CodeType {
CORNER = 1;
CENTER_SIZE = 2;
}
// Minimum box size (in pixels). Required!
optional float min_size = 1;
// Maximum box size (in pixels). Required!
optional float max_size = 2;
// Various of aspect ratios. Duplicate ratios will be ignored.
// If none is provided, we use default ratio 1.
repeated float aspect_ratio = 3;
// If true, will flip each aspect ratio.
// For example, if there is aspect ratio "r",
// we will generate aspect ratio "1.0/r" as well.
optional bool flip = 4 [default = true];
// If true, will clip the prior so that it is within [0, 1]
optional bool clip = 5 [default = true];
// Variance for adjusting the prior bboxes.
repeated float variance = 6;
}
// Message that store parameters used by DetectionOutputLayer
message DetectionOutputParameter {
// Number of classes to be predicted. Required!
optional uint32 num_classes = 1;
// If true, bounding box are shared among different classes.
optional bool share_location = 2 [default = true];
// Background label id. If there is no background class,
// set it as -1.
optional int32 background_label_id = 3 [default = 0];
// Type of coding method for bbox.
optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
// If true, variance is encoded in target; otherwise we need to adjust the
// predicted offset accordingly.
optional bool variance_encoded_in_target = 8 [default = false];
// Number of total bboxes to be kept per image after nms step.
// -1 means keeping all bboxes after nms step.
optional int32 keep_top_k = 7 [default = -1];
// Only consider detections whose confidences are larger than a threshold.
// If not provided, consider all boxes.
optional float confidence_threshold = 9;
// Parameters used for non maximum suppression.
// Threshold to be used in nms.
optional float nms_threshold = 10 [default = 0.3];
// Maximum number of results to be kept.
optional int32 top_k = 11;
}
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
@ -335,7 +404,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 138 (last added: crop_param)
// LayerParameter next available layer-specific ID: 142 (last added: detection_output_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
@ -389,6 +458,7 @@ message LayerParameter {
optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 137;
optional DataParameter data_param = 107;
optional DetectionOutputParameter detection_output_param = 141;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
@ -404,17 +474,20 @@ message LayerParameter {
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional NormalizeBBoxParameter normalize_bbox_param = 139;
optional PermuteParameter permute_param = 138;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PriorBoxParameter prior_box_param = 140;
optional PythonParameter python_param = 130;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional SigmoidParameter sigmoid_param = 124;
optional SliceParameter slice_param = 126;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional WindowDataParameter window_data_param = 129;
@ -524,6 +597,12 @@ message ConvolutionParameter {
CUDNN = 2;
}
optional Engine engine = 15 [default = DEFAULT];
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme a trous from Holschneider et al. 1987.)
optional uint32 dilation_h = 18; // The dilation height
optional uint32 dilation_w = 19; // The dilation width
optional uint32 dilation = 20; // The dilation; defaults to 1
}
message DataParameter {
@ -1174,3 +1253,15 @@ message PReLUParameter {
// Whether or not slope paramters are shared across channels.
optional bool channel_shared = 2 [default = false];
}
// The normalized bounding box [0, 1] w.r.t. the input image size.
message NormalizedBBox {
optional float xmin = 1;
optional float ymin = 2;
optional float xmax = 3;
optional float ymax = 4;
optional int32 label = 5;
optional bool difficult = 6;
optional float score = 7;
optional float size = 8;
}

@ -48,6 +48,7 @@ using namespace cv::dnn;
#include <iostream>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <google/protobuf/message.h>
#include <google/protobuf/text_format.h>
@ -63,279 +64,297 @@ using ::google::protobuf::Reflection;
namespace
{
class CaffeImporter : public Importer
{
caffe::NetParameter net;
caffe::NetParameter netBinary;
public:
template<typename T>
static cv::String toString(const T &v)
{
std::ostringstream ss;
ss << v;
return ss.str();
}
CaffeImporter(const char *pototxt, const char *caffeModel)
{
ReadNetParamsFromTextFileOrDie(pototxt, &net);
class CaffeImporter : public Importer
{
caffe::NetParameter net;
caffe::NetParameter netBinary;
if (caffeModel && caffeModel[0])
ReadNetParamsFromBinaryFileOrDie(caffeModel, &netBinary);
}
public:
void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams &params)
{
const Reflection *refl = msg.GetReflection();
int type = field->cpp_type();
bool isRepeated = field->is_repeated();
const std::string &name = field->name();
#define SET_UP_FILED(getter, arrayConstr, gtype) \
if (isRepeated) { \
const RepeatedField<gtype> &v = refl->GetRepeatedField<gtype>(msg, field); \
params.set(name, DictValue::arrayConstr(v.begin(), (int)v.size())); \
} \
else { \
params.set(name, refl->getter(msg, field)); \
}
switch (type)
{
case FieldDescriptor::CPPTYPE_INT32:
SET_UP_FILED(GetInt32, arrayInt, ::google::protobuf::int32);
break;
case FieldDescriptor::CPPTYPE_UINT32:
SET_UP_FILED(GetUInt32, arrayInt, ::google::protobuf::uint32);
break;
case FieldDescriptor::CPPTYPE_INT64:
SET_UP_FILED(GetInt32, arrayInt, ::google::protobuf::int64);
break;
case FieldDescriptor::CPPTYPE_UINT64:
SET_UP_FILED(GetUInt32, arrayInt, ::google::protobuf::uint64);
break;
case FieldDescriptor::CPPTYPE_BOOL:
SET_UP_FILED(GetBool, arrayInt, bool);
break;
case FieldDescriptor::CPPTYPE_DOUBLE:
SET_UP_FILED(GetDouble, arrayReal, double);
break;
case FieldDescriptor::CPPTYPE_FLOAT:
SET_UP_FILED(GetFloat, arrayReal, float);
break;
case FieldDescriptor::CPPTYPE_STRING:
if (isRepeated) {
const RepeatedPtrField<std::string> &v = refl->GetRepeatedPtrField<std::string>(msg, field);
params.set(name, DictValue::arrayString(v.begin(), (int)v.size()));
}
else {
params.set(name, refl->GetString(msg, field));
}
break;
case FieldDescriptor::CPPTYPE_ENUM:
if (isRepeated) {
int size = refl->FieldSize(msg, field);
std::vector<cv::String> buf(size);
for (int i = 0; i < size; i++)
buf[i] = refl->GetRepeatedEnum(msg, field, i)->name();
params.set(name, DictValue::arrayString(buf.begin(), size));
}
else {
params.set(name, refl->GetEnum(msg, field)->name());
}
break;
default:
CV_Error(Error::StsError, "Unknown type \"" + String(field->type_name()) + "\" in prototxt");
break;
CaffeImporter(const char *pototxt, const char *caffeModel)
{
ReadNetParamsFromTextFileOrDie(pototxt, &net);
if (caffeModel && caffeModel[0])
ReadNetParamsFromBinaryFileOrDie(caffeModel, &netBinary);
}
void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams &params)
{
const Reflection *refl = msg.GetReflection();
int type = field->cpp_type();
bool isRepeated = field->is_repeated();
const std::string &name = field->name();
#define SET_UP_FILED(getter, arrayConstr, gtype) \
if (isRepeated) { \
const RepeatedField<gtype> &v = refl->GetRepeatedField<gtype>(msg, field); \
params.set(name, DictValue::arrayConstr(v.begin(), (int)v.size())); \
} \
else { \
params.set(name, refl->getter(msg, field)); \
}
}
inline static bool ends_with_param(const std::string &str)
switch (type)
{
static const std::string _param("_param");
return (str.size() >= _param.size()) && str.compare(str.size() - _param.size(), _param.size(), _param) == 0;
case FieldDescriptor::CPPTYPE_INT32:
SET_UP_FILED(GetInt32, arrayInt, ::google::protobuf::int32);
break;
case FieldDescriptor::CPPTYPE_UINT32:
SET_UP_FILED(GetUInt32, arrayInt, ::google::protobuf::uint32);
break;
case FieldDescriptor::CPPTYPE_INT64:
SET_UP_FILED(GetInt32, arrayInt, ::google::protobuf::int64);
break;
case FieldDescriptor::CPPTYPE_UINT64:
SET_UP_FILED(GetUInt32, arrayInt, ::google::protobuf::uint64);
break;
case FieldDescriptor::CPPTYPE_BOOL:
SET_UP_FILED(GetBool, arrayInt, bool);
break;
case FieldDescriptor::CPPTYPE_DOUBLE:
SET_UP_FILED(GetDouble, arrayReal, double);
break;
case FieldDescriptor::CPPTYPE_FLOAT:
SET_UP_FILED(GetFloat, arrayReal, float);
break;
case FieldDescriptor::CPPTYPE_STRING:
if (isRepeated) {
const RepeatedPtrField<std::string> &v = refl->GetRepeatedPtrField<std::string>(msg, field);
params.set(name, DictValue::arrayString(v.begin(), (int)v.size()));
}
else {
params.set(name, refl->GetString(msg, field));
}
break;
case FieldDescriptor::CPPTYPE_ENUM:
if (isRepeated) {
int size = refl->FieldSize(msg, field);
std::vector<cv::String> buf(size);
for (int i = 0; i < size; i++)
buf[i] = refl->GetRepeatedEnum(msg, field, i)->name();
params.set(name, DictValue::arrayString(buf.begin(), size));
}
else {
params.set(name, refl->GetEnum(msg, field)->name());
}
break;
default:
CV_Error(Error::StsError, "Unknown type \"" + String(field->type_name()) + "\" in prototxt");
break;
}
}
void extractLayerParams(const Message &msg, cv::dnn::LayerParams &params, bool isInternal = false)
{
const Descriptor *msgDesc = msg.GetDescriptor();
const Reflection *msgRefl = msg.GetReflection();
inline static bool ends_with_param(const std::string &str)
{
static const std::string _param("_param");
return (str.size() >= _param.size()) && str.compare(str.size() - _param.size(), _param.size(), _param) == 0;
}
for (int fieldId = 0; fieldId < msgDesc->field_count(); fieldId++)
{
const FieldDescriptor *fd = msgDesc->field(fieldId);
if (!isInternal && !ends_with_param(fd->name()))
continue;
bool hasData = fd->is_required() ||
(fd->is_optional() && msgRefl->HasField(msg, fd)) ||
(fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0);
if (!hasData)
continue;
if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE)
{
if (fd->is_repeated()) //Extract only first item!
extractLayerParams(msgRefl->GetRepeatedMessage(msg, fd, 0), params, true);
else
extractLayerParams(msgRefl->GetMessage(msg, fd), params, true);
}
else
{
addParam(msg, fd, params);
}
}
}
void extractLayerParams(const Message &msg, cv::dnn::LayerParams &params, bool isInternal = false)
{
const Descriptor *msgDesc = msg.GetDescriptor();
const Reflection *msgRefl = msg.GetReflection();
BlobShape blobShapeFromProto(const caffe::BlobProto &pbBlob)
for (int fieldId = 0; fieldId < msgDesc->field_count(); fieldId++)
{
if (pbBlob.has_num() || pbBlob.has_channels() || pbBlob.has_height() || pbBlob.has_width())
{
return BlobShape(pbBlob.num(), pbBlob.channels(), pbBlob.height(), pbBlob.width());
}
else if (pbBlob.has_shape())
{
const caffe::BlobShape &_shape = pbBlob.shape();
BlobShape shape = BlobShape::all(_shape.dim_size());
const FieldDescriptor *fd = msgDesc->field(fieldId);
for (int i = 0; i < _shape.dim_size(); i++)
shape[i] = (int)_shape.dim(i);
if (!isInternal && !ends_with_param(fd->name()))
continue;
return shape;
bool hasData = fd->is_required() ||
(fd->is_optional() && msgRefl->HasField(msg, fd)) ||
(fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0);
if (!hasData)
continue;
if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE)
{
if (fd->is_repeated()) //Extract only first item!
extractLayerParams(msgRefl->GetRepeatedMessage(msg, fd, 0), params, true);
else
extractLayerParams(msgRefl->GetMessage(msg, fd), params, true);
}
else
{
CV_Error(Error::StsError, "Unknown shape of input blob");
return BlobShape();
addParam(msg, fd, params);
}
}
}
void blobFromProto(const caffe::BlobProto &pbBlob, cv::dnn::Blob &dstBlob)
BlobShape blobShapeFromProto(const caffe::BlobProto &pbBlob)
{
if (pbBlob.has_num() || pbBlob.has_channels() || pbBlob.has_height() || pbBlob.has_width())
{
BlobShape shape = blobShapeFromProto(pbBlob);
dstBlob.create(shape, CV_32F);
CV_Assert(pbBlob.data_size() == (int)dstBlob.matRefConst().total());
return BlobShape(pbBlob.num(), pbBlob.channels(), pbBlob.height(), pbBlob.width());
}
else if (pbBlob.has_shape())
{
const caffe::BlobShape &_shape = pbBlob.shape();
BlobShape shape = BlobShape::all(_shape.dim_size());
CV_DbgAssert(pbBlob.GetDescriptor()->FindFieldByLowercaseName("data")->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT);
float *dstData = dstBlob.matRef().ptr<float>();
for (int i = 0; i < _shape.dim_size(); i++)
shape[i] = (int)_shape.dim(i);
for (int i = 0; i < pbBlob.data_size(); i++)
dstData[i] = pbBlob.data(i);
return shape;
}
void extractBinaryLayerParms(const caffe::LayerParameter& layer, LayerParams& layerParams)
else
{
const std::string &name = layer.name();
CV_Error(Error::StsError, "Unknown shape of input blob");
return BlobShape();
}
}
int li;
for (li = 0; li != netBinary.layer_size(); li++)
{
if (netBinary.layer(li).name() == name)
break;
}
void blobFromProto(const caffe::BlobProto &pbBlob, cv::dnn::Blob &dstBlob)
{
BlobShape shape = blobShapeFromProto(pbBlob);
if (li == netBinary.layer_size() || netBinary.layer(li).blobs_size() == 0)
return;
dstBlob.create(shape, CV_32F);
CV_Assert(pbBlob.data_size() == (int)dstBlob.matRefConst().total());
const caffe::LayerParameter &binLayer = netBinary.layer(li);
layerParams.blobs.resize(binLayer.blobs_size());
for (int bi = 0; bi < binLayer.blobs_size(); bi++)
{
blobFromProto(binLayer.blobs(bi), layerParams.blobs[bi]);
}
}
CV_DbgAssert(pbBlob.GetDescriptor()->FindFieldByLowercaseName("data")->cpp_type() == FieldDescriptor::CPPTYPE_FLOAT);
float *dstData = dstBlob.matRef().ptr<float>();
struct BlobNote
{
BlobNote(const std::string &_name, int _layerId, int _outNum) :
name(_name.c_str()), layerId(_layerId), outNum(_outNum) {}
for (int i = 0; i < pbBlob.data_size(); i++)
dstData[i] = pbBlob.data(i);
}
const char *name;
int layerId, outNum;
};
void extractBinaryLayerParms(const caffe::LayerParameter& layer, LayerParams& layerParams)
{
const std::string &name = layer.name();
void populateNet(Net dstNet)
int li;
for (li = 0; li != netBinary.layer_size(); li++)
{
int layersSize = net.layer_size();
std::vector<BlobNote> addedBlobs;
addedBlobs.reserve(layersSize + 1);
if (netBinary.layer(li).name() == name)
break;
}
//setup input layer names
{
std::vector<String> netInputs(net.input_size());
for (int inNum = 0; inNum < net.input_size(); inNum++)
{
addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
netInputs[inNum] = net.input(inNum);
}
dstNet.setNetInputs(netInputs);
}
if (li == netBinary.layer_size() || netBinary.layer(li).blobs_size() == 0)
return;
for (int li = 0; li < layersSize; li++)
{
const caffe::LayerParameter &layer = net.layer(li);
String name = layer.name();
String type = layer.type();
LayerParams layerParams;
const caffe::LayerParameter &binLayer = netBinary.layer(li);
layerParams.blobs.resize(binLayer.blobs_size());
for (int bi = 0; bi < binLayer.blobs_size(); bi++)
{
blobFromProto(binLayer.blobs(bi), layerParams.blobs[bi]);
}
}
extractLayerParams(layer, layerParams);
extractBinaryLayerParms(layer, layerParams);
struct BlobNote
{
BlobNote(const std::string &_name, int _layerId, int _outNum) :
name(_name.c_str()), layerId(_layerId), outNum(_outNum) {}
const char *name;
int layerId, outNum;
};
int id = dstNet.addLayer(name, type, layerParams);
std::vector<BlobNote> addedBlobs;
std::map<String, int> layerCounter;
for (int inNum = 0; inNum < layer.bottom_size(); inNum++)
addInput(layer.bottom(inNum), id, inNum, dstNet, addedBlobs);
void populateNet(Net dstNet)
{
int layersSize = net.layer_size();
layerCounter.clear();
addedBlobs.clear();
addedBlobs.reserve(layersSize + 1);
for (int outNum = 0; outNum < layer.top_size(); outNum++)
addOutput(layer, id, outNum, addedBlobs);
//setup input layer names
{
std::vector<String> netInputs(net.input_size());
for (int inNum = 0; inNum < net.input_size(); inNum++)
{
addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
netInputs[inNum] = net.input(inNum);
}
dstNet.setNetInputs(netInputs);
}
void addOutput(const caffe::LayerParameter &layer, int layerId, int outNum, std::vector<BlobNote> &addedBlobs)
for (int li = 0; li < layersSize; li++)
{
const std::string &name = layer.top(outNum);
const caffe::LayerParameter &layer = net.layer(li);
String name = layer.name();
String type = layer.type();
LayerParams layerParams;
bool haveDups = false;
for (int idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
{
if (addedBlobs[idx].name == name)
{
haveDups = true;
break;
}
}
extractLayerParams(layer, layerParams);
extractBinaryLayerParms(layer, layerParams);
if (haveDups)
{
bool isInplace = layer.bottom_size() > outNum && layer.bottom(outNum) == name;
if (!isInplace)
CV_Error(Error::StsBadArg, "Duplicate blobs produced by multiple sources");
}
int repetitions = layerCounter[name]++;
if (repetitions)
name += String("_") + toString(repetitions);
int id = dstNet.addLayer(name, type, layerParams);
for (int inNum = 0; inNum < layer.bottom_size(); inNum++)
addInput(layer.bottom(inNum), id, inNum, dstNet);
addedBlobs.push_back(BlobNote(name, layerId, outNum));
for (int outNum = 0; outNum < layer.top_size(); outNum++)
addOutput(layer, id, outNum);
}
void addInput(const std::string &name, int layerId, int inNum, Net &dstNet, std::vector<BlobNote> &addedBlobs)
{
int idx;
for (idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
{
if (addedBlobs[idx].name == name)
break;
}
addedBlobs.clear();
}
if (idx < 0)
void addOutput(const caffe::LayerParameter &layer, int layerId, int outNum)
{
const std::string &name = layer.top(outNum);
bool haveDups = false;
for (int idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
{
if (addedBlobs[idx].name == name)
{
CV_Error(Error::StsObjectNotFound, "Can't found output blob \"" + name + "\"");
return;
haveDups = true;
break;
}
}
dstNet.connect(addedBlobs[idx].layerId, addedBlobs[idx].outNum, layerId, inNum);
if (haveDups)
{
bool isInplace = layer.bottom_size() > outNum && layer.bottom(outNum) == name;
if (!isInplace)
CV_Error(Error::StsBadArg, "Duplicate blobs produced by multiple sources");
}
~CaffeImporter()
addedBlobs.push_back(BlobNote(name, layerId, outNum));
}
void addInput(const std::string &name, int layerId, int inNum, Net &dstNet)
{
int idx;
for (idx = (int)addedBlobs.size() - 1; idx >= 0; idx--)
{
if (addedBlobs[idx].name == name)
break;
}
if (idx < 0)
{
CV_Error(Error::StsObjectNotFound, "Can't find output blob \"" + name + "\"");
return;
}
dstNet.connect(addedBlobs[idx].layerId, addedBlobs[idx].outNum, layerId, inNum);
}
};
~CaffeImporter()
{
}
};
}
@ -353,3 +372,20 @@ Ptr<Importer> cv::dnn::createCaffeImporter(const String&, const String&)
}
#endif //HAVE_PROTOBUF
Net cv::dnn::readNetFromCaffe(const String &prototxt, const String &caffeModel /*= String()*/)
{
Ptr<Importer> caffeImporter;
try
{
caffeImporter = createCaffeImporter(prototxt, caffeModel);
}
catch(...)
{
}
Net net;
if (caffeImporter)
caffeImporter->populateNet(net);
return net;
}

@ -46,52 +46,59 @@
#include <sstream>
#include <opencv2/core.hpp>
#define CHECK(cond) cv::GLogWrapper(__FILE__, CV_Func, __LINE__, "CHECK", #cond, cond)
#define CHECK_EQ(a, b) cv::GLogWrapper(__FILE__, CV_Func, __LINE__, "CHECK", #a"="#b, ((a) == (b)))
#define LOG(TYPE) cv::GLogWrapper(__FILE__, CV_Func, __LINE__, #TYPE)
#define CHECK(cond) for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, "CHECK", #cond, cond); _logger.exit(); _logger.check()) _logger.stream()
#define CHECK_EQ(a, b) for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, "CHECK", #a"="#b, ((a) == (b))); _logger.exit(); _logger.check()) _logger.stream()
#define LOG(TYPE) for(cv::dnn::GLogWrapper _logger(__FILE__, CV_Func, __LINE__, #TYPE); _logger.exit(); _logger.check()) _logger.stream()
namespace cv
{
namespace dnn
{
class GLogWrapper
{
std::stringstream stream;
const char *file, *func, *type, *cond_str;
int line;
bool cond_staus;
bool cond_staus, exit_loop;
std::stringstream sstream;
public:
GLogWrapper(const char *_file, const char *_func, int _line,
const char *_type,
const char *_cond_str = NULL, bool _cond_status = true
) :
file(_file), func(_func), type(_type), cond_str(_cond_str),
line(_line), cond_staus(_cond_status) {}
const char *_type,
const char *_cond_str = NULL, bool _cond_status = true
) :
file(_file), func(_func), type(_type), cond_str(_cond_str),
line(_line), cond_staus(_cond_status), exit_loop(true) {}
std::iostream &stream()
{
return sstream;
}
template<typename T>
GLogWrapper &operator<<(const T &v)
bool exit()
{
if (!cond_str || cond_str && !cond_staus)
stream << v;
return *this;
return exit_loop;
}
~GLogWrapper()
void check()
{
exit_loop = false;
if (cond_str && !cond_staus)
{
cv::error(cv::Error::StsError, "FAILED: " + String(cond_str) + "." + stream.str(), func, file, line);
cv::error(cv::Error::StsError, "FAILED: " + String(cond_str) + ". " + sstream.str(), func, file, line);
}
else if (!cond_str && strcmp(type, "CHECK"))
{
if (!std::strcmp(type, "INFO"))
std::cout << stream.str() << std::endl;
std::cout << sstream.str() << std::endl;
else
std::cerr << stream.str() << std::endl;
std::cerr << sstream.str() << std::endl;
}
}
};
}
}
#endif

@ -2,62 +2,20 @@
#include "layer_loaders.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <climits>
#include "layers/layers_common.hpp"
namespace cv
{
namespace dnn
{
//Utils
//Extracts params used into Conv, Deconv and Pooling layers
static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
{
if (params.has("kernel_h") && params.has("kernel_w"))
{
kernel.height = params.get<int>("kernel_h");
kernel.width = params.get<int>("kernel_w");
}
else if (params.has("kernel_size"))
{
kernel.height = kernel.width = params.get<int>("kernel_size");
}
else
{
CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
CV_Assert(kernel.height > 0 && kernel.width > 0);
if (params.has("pad_h") && params.has("pad_w"))
{
pad.height = params.get<int>("pad_h");
pad.width = params.get<int>("pad_w");
}
else
{
pad.height = pad.width = params.get<int>("pad", 0);
}
CV_Assert(pad.height >= 0 && pad.width >= 0);
if (params.has("stride_h") && params.has("stride_w"))
{
stride.height = params.get<int>("stride_h");
stride.width = params.get<int>("stride_w");
}
else
{
stride.height = stride.width = params.get<int>("stride", 1);
}
CV_Assert(stride.height > 0 && stride.width > 0);
}
//Layers
//Convolution and Deconvolution
static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
{
l->setParamsFrom(params);
getCaffeConvParams(params, l->kernel, l->pad, l->stride);
getConvolutionKernelParams(params, l->kernel.height, l->kernel.width, l->pad.height, l->pad.width, l->stride.height, l->stride.width, l->dilation.height, l->dilation.width);
bool bias = params.get<bool>("bias_term", true);
int numOutput = params.get<int>("num_output");
@ -88,6 +46,7 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
{
int type;
Size kernel, stride, pad;
bool globalPooling;
if (params.has("pool"))
{
@ -106,9 +65,13 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
type = PoolingLayer::MAX;
}
getCaffeConvParams(params, kernel, pad, stride);
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, pad.height, pad.width, stride.height, stride.width);
//getCaffeConvParams(params, kernel, pad, stride);
return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
if (!globalPooling)
return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
else
return Ptr<Layer>(PoolingLayer::createGlobal(type));
}
template<>
@ -197,11 +160,6 @@ Ptr<Layer> createLayerFromCaffe<ReshapeLayer>(LayerParams &params)
return Ptr<Layer>(ReshapeLayer::create(newShape, applyingRange));
}
Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ReshapeLayer::create(Shape(0, -1)));
}
template<>
Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams& params)
{
@ -274,30 +232,16 @@ Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams& params)
template<> //CropLayer specialization
Ptr<Layer> createLayerFromCaffe<CropLayer>(LayerParams& params)
{
int start_axis = params.get<int>("axis");
if (4 <= start_axis)
CV_Error(Error::StsBadArg, "crop axis bigger than input dim");
int start_axis = params.get<int>("axis", 2);
DictValue *paramOffset = params.ptr("offset");
DictValue paramOffset = params.get("offset");
std::vector<int> offset(4, 0);
if (1 < paramOffset.size())
std::vector<int> offset;
if (paramOffset)
{
if (4 - start_axis != paramOffset.size())
CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
for (size_t i = start_axis; i < offset.size(); i++)
{
offset[i] = paramOffset.get<int>(i);
}
}
else
{
const int offset_val = paramOffset.get<int>(0);
for (size_t i = start_axis; i < offset.size(); i++)
{
offset[i] = offset_val;
}
for (int i = 0; i < paramOffset->size(); i++)
offset.push_back(paramOffset->get<int>(i));
}
return Ptr<Layer>(CropLayer::create(start_axis, offset));
}

@ -44,6 +44,7 @@
#include <algorithm>
#include <iostream>
#include <sstream>
#include <iterator>
using namespace cv;
using namespace cv::dnn;
@ -59,7 +60,7 @@ namespace dnn
{
template<typename T>
String toString(const T &v)
static String toString(const T &v)
{
std::ostringstream ss;
ss << v;
@ -127,7 +128,7 @@ struct LayerData
};
//fake layer containing network input blobs
struct NetInputLayer : public Layer
struct DataLayer : public Layer
{
void allocate(const std::vector<Blob*>&, std::vector<Blob>&) {}
void forward(std::vector<Blob*>&, std::vector<Blob>&) {}
@ -152,7 +153,7 @@ struct Net::Impl
Impl()
{
//allocate fake net input layer
netInputLayer = Ptr<NetInputLayer>(new NetInputLayer());
netInputLayer = Ptr<DataLayer>(new DataLayer());
LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
inpl.id = 0;
inpl.name = "_input";
@ -163,7 +164,7 @@ struct Net::Impl
netWasAllocated = false;
}
Ptr<NetInputLayer> netInputLayer;
Ptr<DataLayer> netInputLayer;
std::vector<int> netOutputs;
typedef std::map<int, LayerData> MapIdToLayerData;
@ -328,11 +329,16 @@ struct Net::Impl
netOutputs.push_back(lid);
}
#ifndef NDEBUG
std::cout << "\nNet Outputs(" << netOutputs.size() << "):\n";
for (size_t i = 0; i < netOutputs.size(); i++)
std::cout << layers[netOutputs[i]].name << std::endl;
std::cout << layers[netOutputs[i]].name << "\n";
#endif
}
#define CV_RETHROW_ERROR(err, newmsg)\
cv::error(err.code, newmsg, err.func.c_str(), err.file.c_str(), err.line)
void allocateLayer(int lid)
{
LayerData &ld = layers[lid];
@ -361,7 +367,15 @@ struct Net::Impl
//allocate layer
ld.outputBlobs.resize(std::max((size_t)1, ld.requiredOutputs.size())); //layer produce at least one output blob
ld.getLayerInstance()->allocate(ld.inputBlobs, ld.outputBlobs);
try
{
Ptr<Layer> layerPtr = ld.getLayerInstance();
layerPtr->allocate(ld.inputBlobs, ld.outputBlobs);
}
catch (const cv::Exception &err)
{
CV_RETHROW_ERROR(err, format("The following error occured while making allocate() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str()));
}
ld.flag = 1;
}
@ -399,7 +413,14 @@ struct Net::Impl
}
//forward itself
ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs);
try
{
ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs);
}
catch (const cv::Exception &err)
{
CV_RETHROW_ERROR(err, format("The following error occured while making forward() for layer \"%s\": %s", ld.name.c_str(), err.err.c_str()));
}
ld.flag = 1;
}
@ -417,12 +438,10 @@ struct Net::Impl
Net::Net() : impl(new Net::Impl)
{
}
Net::~Net()
{
}
int Net::addLayer(const String &name, const String &type, LayerParams &params)
@ -469,16 +488,19 @@ void Net::connect(String _outPin, String _inPin)
impl->connect(outPin.lid, outPin.oid, inpPin.lid, inpPin.oid);
}
void Net::forward()
void Net::allocate()
{
impl->setUpNet();
impl->forwardAll();
}
void Net::forward(LayerId toLayer)
{
impl->setUpNet();
impl->forwardLayer(impl->getLayerData(toLayer));
if (toLayer.isString() && toLayer.get<String>().empty())
impl->forwardAll();
else
impl->forwardLayer(impl->getLayerData(toLayer));
}
void Net::setNetInputs(const std::vector<String> &inputBlobNames)
@ -521,6 +543,16 @@ Blob Net::getParam(LayerId layer, int numParam)
return layerBlobs[numParam];
}
void Net::setParam(LayerId layer, int numParam, const Blob &blob)
{
LayerData &ld = impl->getLayerData(layer);
std::vector<Blob> &layerBlobs = ld.layerInstance->blobs;
CV_Assert(numParam < (int)layerBlobs.size());
//we don't make strong checks, use this function carefully
layerBlobs[numParam] = blob;
}
int Net::getLayerId(const String &layer)
{
return impl->getLayerId(layer);
@ -531,6 +563,34 @@ void Net::deleteLayer(LayerId)
CV_Error(Error::StsNotImplemented, "");
}
Ptr<Layer> Net::getLayer(LayerId layerId)
{
LayerData &ld = impl->getLayerData(layerId);
if (!ld.layerInstance)
CV_Error(Error::StsNullPtr, format("Requseted layer \"%s\" was not initialized", ld.name.c_str()));
return ld.layerInstance;
}
std::vector<String> Net::getLayerNames() const
{
std::vector<String> res;
res.reserve(impl->layers.size());
Impl::MapIdToLayerData::iterator it;
for (it = impl->layers.begin(); it != impl->layers.end(); it++)
{
if (it->second.id) //skip Data layer
res.push_back(it->second.name);
}
return res;
}
bool Net::empty() const
{
return impl->layers.size() <= 1; //first layer is default Data layer
}
//////////////////////////////////////////////////////////////////////////
Importer::~Importer() {}
@ -560,6 +620,43 @@ int Layer::outputNameToIndex(String)
return -1;
}
template <typename T>
static void vecToPVec(const std::vector<T> &v, std::vector<T*> &pv)
{
pv.resize(v.size());
for (size_t i = 0; i < v.size(); i++)
pv[i] = const_cast<T*>(&v[i]);
}
void Layer::allocate(const std::vector<Blob> &inputs, std::vector<Blob> &outputs)
{
std::vector<Blob*> inputsp;
vecToPVec(inputs, inputsp);
this->allocate(inputsp, outputs);
}
std::vector<Blob> Layer::allocate(const std::vector<Blob> &inputs)
{
std::vector<Blob> outputs;
this->allocate(inputs, outputs);
return outputs;
}
void Layer::forward(const std::vector<Blob> &inputs, std::vector<Blob> &outputs)
{
std::vector<Blob*> inputsp;
vecToPVec(inputs, inputsp);
this->forward(inputsp, outputs);
}
void Layer::run(const std::vector<Blob> &inputs, std::vector<Blob> &outputs)
{
std::vector<Blob*> inputsp;
vecToPVec(inputs, inputsp);
this->allocate(inputsp, outputs);
this->forward(inputsp, outputs);
}
Layer::~Layer() {}
//////////////////////////////////////////////////////////////////////////

@ -43,6 +43,14 @@
#include "caffe/layer_loaders.hpp"
#include "layers/blank_layer.hpp"
#include "layers/crop_layer.hpp"
#include "layers/eltwise_layer.hpp"
#include "layers/flatten_layer.hpp"
#include "layers/permute_layer.hpp"
#include "layers/prior_box_layer.hpp"
#include "layers/detection_output_layer.hpp"
#include "layers/normalize_bbox_layer.hpp"
namespace cv
{
namespace dnn
@ -69,7 +77,7 @@ void initModule()
REG_RUNTIME_LAYER_FUNC(Split, createLayerFromCaffe<SplitLayer>);
REG_RUNTIME_LAYER_FUNC(Concat, createLayerFromCaffe<ConcatLayer>);
REG_RUNTIME_LAYER_FUNC(Reshape, createLayerFromCaffe<ReshapeLayer>);
REG_RUNTIME_LAYER_FUNC(Flatten, createFlattenLayerFromCaffe);
REG_RUNTIME_LAYER_CLASS(Flatten, FlattenLayer);
REG_RUNTIME_LAYER_FUNC(Convolution, createLayerFromCaffe<ConvolutionLayer>);
REG_RUNTIME_LAYER_FUNC(Deconvolution, createLayerFromCaffe<DeconvolutionLayer>);
@ -89,6 +97,10 @@ void initModule()
REG_RUNTIME_LAYER_FUNC(Crop, createLayerFromCaffe<CropLayer>);
REG_RUNTIME_LAYER_FUNC(Eltwise, createLayerFromCaffe<EltwiseLayer>);
REG_RUNTIME_LAYER_CLASS(Permute, PermuteLayer)
REG_RUNTIME_LAYER_CLASS(PriorBox, PriorBoxLayer)
REG_RUNTIME_LAYER_CLASS(DetectionOutput, DetectionOutputLayer)
REG_RUNTIME_LAYER_CLASS(NormalizeBBox, NormalizeBBoxLayer)
init.status = true;
}

@ -77,7 +77,8 @@ void ConvolutionLayerImpl::init()
CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height);
CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
useOpenCL = ocl::useOpenCL() && tryUseOpenCL;
//TODO: dilation in OCL mode
useOpenCL = ocl::useOpenCL() && tryUseOpenCL && dilation == Size(1, 1);
}
void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
@ -127,14 +128,15 @@ void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vecto
bool ConvolutionLayerImpl::is1x1() const
{
return (kernel.height == 1 && kernel.width == 1) &&
(stride.height == 1 && stride.width == 1);
(stride.height == 1 && stride.width == 1) &&
(dilation.height == 1 && dilation.width == 1);
}
template<typename XMat>
void ConvolutionLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
XMat biasesMat = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
for (size_t ii = 0; ii < outputs.size(); ii++)
{
@ -182,7 +184,7 @@ void ConvolutionLayerImpl::im2col(const UMat &srcImg, UMat &dstCol)
return;
}
#ifdef HAVE_OPENCL
CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, this->colBlob.umatRef()));
CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, this->colBlob.umatRef()));
dstCol = this->colBlob.umatRefConst();
#else
CV_Error(Error::StsInternal, "");
@ -200,9 +202,9 @@ void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol)
Mat &colMat = colBlob.matRef();
if (srcImg.type() == CV_32F)
im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<float>());
im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<float>());
if (srcImg.type() == CV_64F)
im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<double>());
im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dilation.height, dilation.width, colMat.ptr<double>());
dstCol = colMat;
}
@ -213,8 +215,8 @@ void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
inpW = input.cols();
inpCn = input.channels();
outH = (inpH + 2 * pad.height - kernel.height) / stride.height + 1;
outW = (inpW + 2 * pad.width - kernel.width) / stride.width + 1;
outH = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
outW = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
outCn = numOutput;
topH = outH; topW = outW; topCn = outCn;
@ -252,7 +254,7 @@ template<typename XMat>
void DeConvolutionLayerImpl::forward_(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
XMat biasesMat = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
XMat biasesMat = (bias) ? reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1)) : XMat();
for (size_t ii = 0; ii < outputs.size(); ii++)
{
@ -315,21 +317,23 @@ void DeConvolutionLayerImpl::col2im(const UMat &colMat, UMat &dstImg)
//Initializers
Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad)
Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
{
ConvolutionLayerImpl *l = new ConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
l->dilation = dilation;
return Ptr<BaseConvolutionLayer>(l);
}
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad)
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad, Size dilation)
{
DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
l->dilation = dilation;
return Ptr<BaseConvolutionLayer>(l);
}

@ -47,57 +47,82 @@ namespace cv
{
namespace dnn
{
CropLayerImpl::CropLayerImpl(int start_axis_, const std::vector<int> &offset_)
CropLayerImpl::CropLayerImpl(int start_axis_, const std::vector<int> &offset_)
{
startAxis = start_axis_;
offset = offset_;
}
void CropLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(2 == inputs.size());
const Blob &inpBlob = *inputs[0];
const Blob &inpSzBlob = *inputs[1];
int start_axis = inpBlob.canonicalAxis(startAxis);
int dims = inpBlob.dims();
std::vector<int> offset_final(dims, 0);
if (offset.size() == 1)
{
start_axis = start_axis_;
offset = offset_;
for (int i = start_axis; i < dims; i++)
offset_final[i] = offset[0];
}
void CropLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
else if (offset.size() > 1)
{
CV_Assert(2 == inputs.size());
if ((int)offset.size() != dims - start_axis)
CV_Error(Error::StsBadArg, "number of offset values specified must be equal to the number of dimensions following axis.");
const Blob &inpBlob = *inputs[0];
CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);
for (int i = start_axis; i < dims; i++)
offset_final[i] = offset[i - start_axis];
}
const Blob &inpSzBlob = *inputs[1];
BlobShape dstShape = inpBlob.shape();
crop_ranges.resize(dims, Range::all());
for (int i = start_axis; i < dims; i++)
{
dstShape[i] = inpSzBlob.size(i);
outSizes.resize(4, 0);
for (int i = 0; i < 4; i++)
if (!offset.empty()) //normal case
{
if (i < start_axis)
outSizes[i] = inpBlob.size(i);
else
outSizes[i] = inpSzBlob.size(i);
if (offset[i] + outSizes[i] > inpBlob.size(i))
if (offset_final[i] < 0 || offset_final[i] + inpSzBlob.size(i) > inpBlob.size(i))
CV_Error(Error::StsBadArg, "invalid crop parameters");
}
outputs.resize(1);
outputs[0].create(BlobShape(outSizes));
}
void CropLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
Blob input = *inputs[0];
Blob output = outputs[0];
for (int num = 0; num < outSizes[0]; ++num)
crop_ranges[i] = Range(offset_final[i], offset_final[i] + inpSzBlob.size(i));
}
else //detect offset automatically so that cropped image is center of original one
{
for (int ch = 0; ch < outSizes[1]; ++ch)
{
for (int row = 0; row < outSizes[2]; ++row)
{
float *srcData = input.ptrf(num + offset[0], ch + offset[1], row + offset[2]);
float *dstData = output.ptrf(num, ch, row);
memcpy(dstData, srcData + offset[3], sizeof(float) * outSizes[3]);
}
}
if (inpSzBlob.size(i) > inpBlob.size(i))
CV_Error(Error::StsBadArg, "invalid output blob size");
int cur_crop = (inpBlob.size(i) - inpSzBlob.size(i)) / 2;
crop_ranges[i] = Range(cur_crop, cur_crop + inpSzBlob.size(i));
}
}
Ptr<CropLayer> CropLayer::create(int start_axis, const std::vector<int> &offset)
{
return Ptr<CropLayer>(new CropLayerImpl(start_axis, offset));
}
outputs.resize(1);
outputs[0].create(dstShape);
}
void CropLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
Blob &input = *inputs[0];
Blob &output = outputs[0];
#ifdef HAVE_OPENCL
if (input.getState() == Blob::HEAD_AT_UMAT)
input.umatRefConst()(&crop_ranges[0]).copyTo(output.umatRef());
else
#endif
input.matRefConst()(&crop_ranges[0]).copyTo(output.matRef());
}
Ptr<CropLayer> CropLayer::create(int start_axis, const std::vector<int> &offset)
{
return Ptr<CropLayer>(new CropLayerImpl(start_axis, offset));
}
}
}

@ -50,9 +50,7 @@ namespace dnn
{
class CropLayerImpl : public CropLayer
{
int start_axis;
std::vector<int> offset;
std::vector<int> outSizes;
std::vector<Range> crop_ranges;
public:
CropLayerImpl(int start_axis, const std::vector<int> &offset);

@ -0,0 +1,750 @@
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "detection_output_layer.hpp"
#include <float.h>
#include <string>
namespace cv
{
namespace dnn
{
namespace util
{
template <typename T>
std::string to_string(T value)
{
std::ostringstream stream;
stream << value;
return stream.str();
}
template <typename T>
void make_error(const std::string& message1, const T& message2)
{
std::string error(message1);
error += std::string(util::to_string<int>(message2));
CV_Error(Error::StsBadArg, error.c_str());
}
template <typename T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2)
{
return pair1.first > pair2.first;
}
}
const std::string DetectionOutputLayer::_layerName = std::string("DetectionOutput");
bool DetectionOutputLayer::getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result)
{
if (!params.has(parameterName))
{
return false;
}
result = params.get(parameterName);
return true;
}
template<typename T>
T DetectionOutputLayer::getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx,
const bool required,
const T& defaultValue)
{
DictValue dictValue;
bool success = getParameterDict(params, parameterName, dictValue);
if(!success)
{
if(required)
{
std::string message = _layerName;
message += " layer parameter does not contain ";
message += parameterName;
message += " parameter.";
CV_Error(Error::StsBadArg, message);
}
else
{
return defaultValue;
}
}
return dictValue.get<T>(idx);
}
void DetectionOutputLayer::getCodeType(LayerParams &params)
{
String codeTypeString = params.get<String>("code_type").toLowerCase();
if (codeTypeString == "corner")
_codeType = caffe::PriorBoxParameter_CodeType_CORNER;
else if (codeTypeString == "center_size")
_codeType = caffe::PriorBoxParameter_CodeType_CENTER_SIZE;
else
_codeType = caffe::PriorBoxParameter_CodeType_CORNER;
}
DetectionOutputLayer::DetectionOutputLayer(LayerParams &params) : Layer(params)
{
_numClasses = getParameter<unsigned>(params, "num_classes");
_shareLocation = getParameter<bool>(params, "share_location");
_numLocClasses = _shareLocation ? 1 : _numClasses;
_backgroundLabelId = getParameter<int>(params, "background_label_id");
_varianceEncodedInTarget = getParameter<bool>(params, "variance_encoded_in_target", 0, false, false);
_keepTopK = getParameter<int>(params, "keep_top_k");
_confidenceThreshold = getParameter<float>(params, "confidence_threshold", 0, false, -FLT_MAX);
_topK = getParameter<int>(params, "top_k", 0, false, -1);
getCodeType(params);
// Parameters used in nms.
_nmsThreshold = getParameter<float>(params, "nms_threshold");
CV_Assert(_nmsThreshold > 0.);
}
void DetectionOutputLayer::checkInputs(const std::vector<Blob*> &inputs)
{
for (size_t i = 1; i < inputs.size(); i++)
{
for (size_t j = 0; j < _numAxes; j++)
{
CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]);
}
}
}
void DetectionOutputLayer::allocate(const std::vector<Blob*> &inputs,
std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
CV_Assert(inputs[0]->num() == inputs[1]->num());
_num = inputs[0]->num();
_numPriors = inputs[2]->rows() / 4;
CV_Assert((_numPriors * _numLocClasses * 4) == inputs[0]->channels());
CV_Assert(int(_numPriors * _numClasses) == inputs[1]->channels());
// num() and channels() are 1.
// Since the number of bboxes to be kept is unknown before nms, we manually
// set it to (fake) 1.
// Each row is a 7 dimension std::vector, which stores
// [image_id, label, confidence, xmin, ymin, xmax, ymax]
BlobShape outputShape = BlobShape(1, 1, 1, 7);
outputs[0].create(BlobShape(outputShape));
}
void DetectionOutputLayer::forward(std::vector<Blob*> &inputs,
std::vector<Blob> &outputs)
{
const float* locationData = inputs[0]->ptrf();
const float* confidenceData = inputs[1]->ptrf();
const float* priorData = inputs[2]->ptrf();
// Retrieve all location predictions.
std::vector<LabelBBox> allLocationPredictions;
GetLocPredictions(locationData, _num, _numPriors, _numLocClasses,
_shareLocation, &allLocationPredictions);
// Retrieve all confidences.
std::vector<std::map<int, std::vector<float> > > allConfidenceScores;
GetConfidenceScores(confidenceData, _num, _numPriors, _numClasses,
&allConfidenceScores);
// Retrieve all prior bboxes. It is same within a batch since we assume all
// images in a batch are of same dimension.
std::vector<caffe::NormalizedBBox> priorBBoxes;
std::vector<std::vector<float> > priorVariances;
GetPriorBBoxes(priorData, _numPriors, &priorBBoxes, &priorVariances);
// Decode all loc predictions to bboxes.
std::vector<LabelBBox> allDecodedBBoxes;
DecodeBBoxesAll(allLocationPredictions, priorBBoxes, priorVariances, _num,
_shareLocation, _numLocClasses, _backgroundLabelId,
_codeType, _varianceEncodedInTarget, &allDecodedBBoxes);
int numKept = 0;
std::vector<std::map<int, std::vector<int> > > allIndices;
for (int i = 0; i < _num; ++i)
{
const LabelBBox& decodeBBoxes = allDecodedBBoxes[i];
const std::map<int, std::vector<float> >& confidenceScores =
allConfidenceScores[i];
std::map<int, std::vector<int> > indices;
int numDetections = 0;
for (int c = 0; c < (int)_numClasses; ++c)
{
if (c == _backgroundLabelId)
{
// Ignore background class.
continue;
}
if (confidenceScores.find(c) == confidenceScores.end())
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find confidence predictions for label ", c);
}
const std::vector<float>& scores = confidenceScores.find(c)->second;
int label = _shareLocation ? -1 : c;
if (decodeBBoxes.find(label) == decodeBBoxes.end())
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find location predictions for label ", label);
continue;
}
const std::vector<caffe::NormalizedBBox>& bboxes =
decodeBBoxes.find(label)->second;
ApplyNMSFast(bboxes, scores, _confidenceThreshold, _nmsThreshold,
_topK, &(indices[c]));
numDetections += indices[c].size();
}
if (_keepTopK > -1 && numDetections > _keepTopK)
{
std::vector<std::pair<float, std::pair<int, int> > > scoreIndexPairs;
for (std::map<int, std::vector<int> >::iterator it = indices.begin();
it != indices.end(); ++it)
{
int label = it->first;
const std::vector<int>& labelIndices = it->second;
if (confidenceScores.find(label) == confidenceScores.end())
{
// Something bad happened for current label.
util::make_error<int>("Could not find location predictions for label ", label);
continue;
}
const std::vector<float>& scores = confidenceScores.find(label)->second;
for (size_t j = 0; j < labelIndices.size(); ++j)
{
size_t idx = labelIndices[j];
CV_Assert(idx < scores.size());
scoreIndexPairs.push_back(
std::make_pair(scores[idx], std::make_pair(label, idx)));
}
}
// Keep outputs k results per image.
std::sort(scoreIndexPairs.begin(), scoreIndexPairs.end(),
util::SortScorePairDescend<std::pair<int, int> >);
scoreIndexPairs.resize(_keepTopK);
// Store the new indices.
std::map<int, std::vector<int> > newIndices;
for (size_t j = 0; j < scoreIndexPairs.size(); ++j)
{
int label = scoreIndexPairs[j].second.first;
int idx = scoreIndexPairs[j].second.second;
newIndices[label].push_back(idx);
}
allIndices.push_back(newIndices);
numKept += _keepTopK;
}
else
{
allIndices.push_back(indices);
numKept += numDetections;
}
}
if (numKept == 0)
{
CV_ErrorNoReturn(Error::StsError, "Couldn't find any detections");
return;
}
std::vector<int> outputsShape(2, 1);
outputsShape.push_back(numKept);
outputsShape.push_back(7);
outputs[0].create(outputsShape);
float* outputsData = outputs[0].ptrf();
int count = 0;
for (int i = 0; i < _num; ++i)
{
const std::map<int, std::vector<float> >& confidenceScores =
allConfidenceScores[i];
const LabelBBox& decodeBBoxes = allDecodedBBoxes[i];
for (std::map<int, std::vector<int> >::iterator it = allIndices[i].begin();
it != allIndices[i].end(); ++it)
{
int label = it->first;
if (confidenceScores.find(label) == confidenceScores.end())
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find confidence predictions for label ", label);
continue;
}
const std::vector<float>& scores = confidenceScores.find(label)->second;
int locLabel = _shareLocation ? -1 : label;
if (decodeBBoxes.find(locLabel) == decodeBBoxes.end())
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find location predictions for label ", locLabel);
continue;
}
const std::vector<caffe::NormalizedBBox>& bboxes =
decodeBBoxes.find(locLabel)->second;
std::vector<int>& indices = it->second;
for (size_t j = 0; j < indices.size(); ++j)
{
int idx = indices[j];
outputsData[count * 7] = i;
outputsData[count * 7 + 1] = label;
outputsData[count * 7 + 2] = scores[idx];
caffe::NormalizedBBox clipBBox;
ClipBBox(bboxes[idx], &clipBBox);
outputsData[count * 7 + 3] = clipBBox.xmin();
outputsData[count * 7 + 4] = clipBBox.ymin();
outputsData[count * 7 + 5] = clipBBox.xmax();
outputsData[count * 7 + 6] = clipBBox.ymax();
++count;
}
}
}
}
float DetectionOutputLayer::BBoxSize(const caffe::NormalizedBBox& bbox,
const bool normalized)
{
if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin())
{
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return 0;
}
else
{
if (bbox.has_size())
{
return bbox.size();
}
else
{
float width = bbox.xmax() - bbox.xmin();
float height = bbox.ymax() - bbox.ymin();
if (normalized)
{
return width * height;
}
else
{
// If bbox is not within range [0, 1].
return (width + 1) * (height + 1);
}
}
}
}
void DetectionOutputLayer::ClipBBox(const caffe::NormalizedBBox& bbox,
caffe::NormalizedBBox* clipBBox)
{
clipBBox->set_xmin(std::max(std::min(bbox.xmin(), 1.f), 0.f));
clipBBox->set_ymin(std::max(std::min(bbox.ymin(), 1.f), 0.f));
clipBBox->set_xmax(std::max(std::min(bbox.xmax(), 1.f), 0.f));
clipBBox->set_ymax(std::max(std::min(bbox.ymax(), 1.f), 0.f));
clipBBox->clear_size();
clipBBox->set_size(BBoxSize(*clipBBox));
clipBBox->set_difficult(bbox.difficult());
}
void DetectionOutputLayer::DecodeBBox(
const caffe::NormalizedBBox& priorBBox, const std::vector<float>& priorVariance,
const CodeType codeType, const bool varianceEncodedInTarget,
const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* decodeBBox)
{
if (codeType == caffe::PriorBoxParameter_CodeType_CORNER)
{
if (varianceEncodedInTarget)
{
// variance is encoded in target, we simply need to add the offset
// predictions.
decodeBBox->set_xmin(priorBBox.xmin() + bbox.xmin());
decodeBBox->set_ymin(priorBBox.ymin() + bbox.ymin());
decodeBBox->set_xmax(priorBBox.xmax() + bbox.xmax());
decodeBBox->set_ymax(priorBBox.ymax() + bbox.ymax());
}
else
{
// variance is encoded in bbox, we need to scale the offset accordingly.
decodeBBox->set_xmin(
priorBBox.xmin() + priorVariance[0] * bbox.xmin());
decodeBBox->set_ymin(
priorBBox.ymin() + priorVariance[1] * bbox.ymin());
decodeBBox->set_xmax(
priorBBox.xmax() + priorVariance[2] * bbox.xmax());
decodeBBox->set_ymax(
priorBBox.ymax() + priorVariance[3] * bbox.ymax());
}
}
else
if (codeType == caffe::PriorBoxParameter_CodeType_CENTER_SIZE)
{
float priorWidth = priorBBox.xmax() - priorBBox.xmin();
CV_Assert(priorWidth > 0);
float priorHeight = priorBBox.ymax() - priorBBox.ymin();
CV_Assert(priorHeight > 0);
float priorCenterX = (priorBBox.xmin() + priorBBox.xmax()) / 2.;
float priorCenterY = (priorBBox.ymin() + priorBBox.ymax()) / 2.;
float decodeBBoxCenterX, decodeBBoxCenterY;
float decodeBBoxWidth, decodeBBoxHeight;
if (varianceEncodedInTarget)
{
// variance is encoded in target, we simply need to retore the offset
// predictions.
decodeBBoxCenterX = bbox.xmin() * priorWidth + priorCenterX;
decodeBBoxCenterY = bbox.ymin() * priorHeight + priorCenterY;
decodeBBoxWidth = exp(bbox.xmax()) * priorWidth;
decodeBBoxHeight = exp(bbox.ymax()) * priorHeight;
}
else
{
// variance is encoded in bbox, we need to scale the offset accordingly.
decodeBBoxCenterX =
priorVariance[0] * bbox.xmin() * priorWidth + priorCenterX;
decodeBBoxCenterY =
priorVariance[1] * bbox.ymin() * priorHeight + priorCenterY;
decodeBBoxWidth =
exp(priorVariance[2] * bbox.xmax()) * priorWidth;
decodeBBoxHeight =
exp(priorVariance[3] * bbox.ymax()) * priorHeight;
}
decodeBBox->set_xmin(decodeBBoxCenterX - decodeBBoxWidth / 2.);
decodeBBox->set_ymin(decodeBBoxCenterY - decodeBBoxHeight / 2.);
decodeBBox->set_xmax(decodeBBoxCenterX + decodeBBoxWidth / 2.);
decodeBBox->set_ymax(decodeBBoxCenterY + decodeBBoxHeight / 2.);
}
else
{
CV_Error(Error::StsBadArg, "Unknown LocLossType.");
}
float bboxSize = BBoxSize(*decodeBBox);
decodeBBox->set_size(bboxSize);
}
void DetectionOutputLayer::DecodeBBoxes(
const std::vector<caffe::NormalizedBBox>& priorBBoxes,
const std::vector<std::vector<float> >& priorVariances,
const CodeType codeType, const bool varianceEncodedInTarget,
const std::vector<caffe::NormalizedBBox>& bboxes,
std::vector<caffe::NormalizedBBox>* decodeBBoxes)
{
CV_Assert(priorBBoxes.size() == priorVariances.size());
CV_Assert(priorBBoxes.size() == bboxes.size());
int numBBoxes = priorBBoxes.size();
if (numBBoxes >= 1)
{
CV_Assert(priorVariances[0].size() == 4);
}
decodeBBoxes->clear();
for (int i = 0; i < numBBoxes; ++i)
{
caffe::NormalizedBBox decodeBBox;
DecodeBBox(priorBBoxes[i], priorVariances[i], codeType,
varianceEncodedInTarget, bboxes[i], &decodeBBox);
decodeBBoxes->push_back(decodeBBox);
}
}
void DetectionOutputLayer::DecodeBBoxesAll(
const std::vector<LabelBBox>& allLocPreds,
const std::vector<caffe::NormalizedBBox>& priorBBoxes,
const std::vector<std::vector<float> >& priorVariances,
const size_t num, const bool shareLocation,
const int numLocClasses, const int backgroundLabelId,
const CodeType codeType, const bool varianceEncodedInTarget,
std::vector<LabelBBox>* allDecodeBBoxes)
{
CV_Assert(allLocPreds.size() == num);
allDecodeBBoxes->clear();
allDecodeBBoxes->resize(num);
for (size_t i = 0; i < num; ++i)
{
// Decode predictions into bboxes.
LabelBBox& decodeBBoxes = (*allDecodeBBoxes)[i];
for (int c = 0; c < numLocClasses; ++c)
{
int label = shareLocation ? -1 : c;
if (label == backgroundLabelId)
{
// Ignore background class.
continue;
}
if (allLocPreds[i].find(label) == allLocPreds[i].end())
{
// Something bad happened if there are no predictions for current label.
util::make_error<int>("Could not find location predictions for label ", label);
}
const std::vector<caffe::NormalizedBBox>& labelLocPreds =
allLocPreds[i].find(label)->second;
DecodeBBoxes(priorBBoxes, priorVariances,
codeType, varianceEncodedInTarget,
labelLocPreds, &(decodeBBoxes[label]));
}
}
}
void DetectionOutputLayer::GetPriorBBoxes(const float* priorData, const int& numPriors,
std::vector<caffe::NormalizedBBox>* priorBBoxes,
std::vector<std::vector<float> >* priorVariances)
{
priorBBoxes->clear();
priorVariances->clear();
for (int i = 0; i < numPriors; ++i)
{
int startIdx = i * 4;
caffe::NormalizedBBox bbox;
bbox.set_xmin(priorData[startIdx]);
bbox.set_ymin(priorData[startIdx + 1]);
bbox.set_xmax(priorData[startIdx + 2]);
bbox.set_ymax(priorData[startIdx + 3]);
float bboxSize = BBoxSize(bbox);
bbox.set_size(bboxSize);
priorBBoxes->push_back(bbox);
}
for (int i = 0; i < numPriors; ++i)
{
int startIdx = (numPriors + i) * 4;
std::vector<float> var;
for (int j = 0; j < 4; ++j)
{
var.push_back(priorData[startIdx + j]);
}
priorVariances->push_back(var);
}
}
void DetectionOutputLayer::ScaleBBox(const caffe::NormalizedBBox& bbox,
const int height, const int width,
caffe::NormalizedBBox* scaleBBox)
{
scaleBBox->set_xmin(bbox.xmin() * width);
scaleBBox->set_ymin(bbox.ymin() * height);
scaleBBox->set_xmax(bbox.xmax() * width);
scaleBBox->set_ymax(bbox.ymax() * height);
scaleBBox->clear_size();
bool normalized = !(width > 1 || height > 1);
scaleBBox->set_size(BBoxSize(*scaleBBox, normalized));
scaleBBox->set_difficult(bbox.difficult());
}
void DetectionOutputLayer::GetLocPredictions(
const float* locData, const int num,
const int numPredsPerClass, const int numLocClasses,
const bool shareLocation, std::vector<LabelBBox>* locPreds)
{
locPreds->clear();
if (shareLocation)
{
CV_Assert(numLocClasses == 1);
}
locPreds->resize(num);
for (int i = 0; i < num; ++i)
{
LabelBBox& labelBBox = (*locPreds)[i];
for (int p = 0; p < numPredsPerClass; ++p)
{
int startIdx = p * numLocClasses * 4;
for (int c = 0; c < numLocClasses; ++c)
{
int label = shareLocation ? -1 : c;
if (labelBBox.find(label) == labelBBox.end())
{
labelBBox[label].resize(numPredsPerClass);
}
labelBBox[label][p].set_xmin(locData[startIdx + c * 4]);
labelBBox[label][p].set_ymin(locData[startIdx + c * 4 + 1]);
labelBBox[label][p].set_xmax(locData[startIdx + c * 4 + 2]);
labelBBox[label][p].set_ymax(locData[startIdx + c * 4 + 3]);
}
}
locData += numPredsPerClass * numLocClasses * 4;
}
}
void DetectionOutputLayer::GetConfidenceScores(
const float* confData, const int num,
const int numPredsPerClass, const int numClasses,
std::vector<std::map<int, std::vector<float> > >* confPreds)
{
confPreds->clear();
confPreds->resize(num);
for (int i = 0; i < num; ++i)
{
std::map<int, std::vector<float> >& labelScores = (*confPreds)[i];
for (int p = 0; p < numPredsPerClass; ++p)
{
int startIdx = p * numClasses;
for (int c = 0; c < numClasses; ++c)
{
labelScores[c].push_back(confData[startIdx + c]);
}
}
confData += numPredsPerClass * numClasses;
}
}
void DetectionOutputLayer::ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes,
const std::vector<float>& scores,
const float score_threshold,
const float nms_threshold, const int top_k,
std::vector<int>* indices)
{
// Sanity check.
CV_Assert(bboxes.size() == scores.size());
// Get top_k scores (with corresponding indices).
std::vector<std::pair<float, int> > score_index_vec;
GetMaxScoreIndex(scores, score_threshold, top_k, &score_index_vec);
// Do nms.
indices->clear();
while (score_index_vec.size() != 0)
{
const int idx = score_index_vec.front().second;
bool keep = true;
for (size_t k = 0; k < indices->size(); ++k)
{
if (keep)
{
const int kept_idx = (*indices)[k];
float overlap = JaccardOverlap(bboxes[idx], bboxes[kept_idx]);
keep = overlap <= nms_threshold;
}
else
{
break;
}
}
if (keep)
{
indices->push_back(idx);
}
score_index_vec.erase(score_index_vec.begin());
}
}
void DetectionOutputLayer::GetMaxScoreIndex(
const std::vector<float>& scores, const float threshold,const int top_k,
std::vector<std::pair<float, int> >* score_index_vec)
{
// Generate index score pairs.
for (size_t i = 0; i < scores.size(); ++i)
{
if (scores[i] > threshold)
{
score_index_vec->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(score_index_vec->begin(), score_index_vec->end(),
util::SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < (int)score_index_vec->size())
{
score_index_vec->resize(top_k);
}
}
void DetectionOutputLayer::IntersectBBox(const caffe::NormalizedBBox& bbox1,
const caffe::NormalizedBBox& bbox2,
caffe::NormalizedBBox* intersect_bbox) {
if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() ||
bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin())
{
// Return [0, 0, 0, 0] if there is no intersection.
intersect_bbox->set_xmin(0);
intersect_bbox->set_ymin(0);
intersect_bbox->set_xmax(0);
intersect_bbox->set_ymax(0);
}
else
{
intersect_bbox->set_xmin(std::max(bbox1.xmin(), bbox2.xmin()));
intersect_bbox->set_ymin(std::max(bbox1.ymin(), bbox2.ymin()));
intersect_bbox->set_xmax(std::min(bbox1.xmax(), bbox2.xmax()));
intersect_bbox->set_ymax(std::min(bbox1.ymax(), bbox2.ymax()));
}
}
float DetectionOutputLayer::JaccardOverlap(const caffe::NormalizedBBox& bbox1,
const caffe::NormalizedBBox& bbox2,
const bool normalized) {
caffe::NormalizedBBox intersect_bbox;
IntersectBBox(bbox1, bbox2, &intersect_bbox);
float intersect_width, intersect_height;
if (normalized)
{
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin();
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin();
}
else
{
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin() + 1;
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin() + 1;
}
if (intersect_width > 0 && intersect_height > 0)
{
float intersect_size = intersect_width * intersect_height;
float bbox1_size = BBoxSize(bbox1);
float bbox2_size = BBoxSize(bbox2);
return intersect_size / (bbox1_size + bbox2_size - intersect_size);
}
else
{
return 0.;
}
}
}
}

@ -0,0 +1,226 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_DETECTION_OUTPUT_LAYER_HPP__
#include "../precomp.hpp"
#include "caffe.pb.h"
namespace cv
{
namespace dnn
{
class DetectionOutputLayer : public Layer
{
unsigned _numClasses;
bool _shareLocation;
int _numLocClasses;
int _backgroundLabelId;
typedef caffe::PriorBoxParameter_CodeType CodeType;
CodeType _codeType;
bool _varianceEncodedInTarget;
int _keepTopK;
float _confidenceThreshold;
int _num;
int _numPriors;
float _nmsThreshold;
int _topK;
static const size_t _numAxes = 4;
static const std::string _layerName;
public:
DetectionOutputLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void checkInputs(const std::vector<Blob*> &inputs);
void getCodeType(LayerParams &params);
template<typename T>
T getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx = 0,
const bool required = true,
const T& defaultValue = T());
bool getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result);
typedef std::map<int, std::vector<caffe::NormalizedBBox> > LabelBBox;
// Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1].
void ClipBBox(const caffe::NormalizedBBox& bbox, caffe::NormalizedBBox* clip_bbox);
// Decode a bbox according to a prior bbox.
void DecodeBBox(const caffe::NormalizedBBox& prior_bbox,
const std::vector<float>& prior_variance, const CodeType code_type,
const bool variance_encoded_in_target, const caffe::NormalizedBBox& bbox,
caffe::NormalizedBBox* decode_bbox);
// Decode a set of bboxes according to a set of prior bboxes.
void DecodeBBoxes(const std::vector<caffe::NormalizedBBox>& prior_bboxes,
const std::vector<std::vector<float> >& prior_variances,
const CodeType code_type, const bool variance_encoded_in_target,
const std::vector<caffe::NormalizedBBox>& bboxes,
std::vector<caffe::NormalizedBBox>* decode_bboxes);
// Decode all bboxes in a batch.
void DecodeBBoxesAll(const std::vector<LabelBBox>& all_loc_pred,
const std::vector<caffe::NormalizedBBox>& prior_bboxes,
const std::vector<std::vector<float> >& prior_variances,
const size_t num, const bool share_location,
const int num_loc_classes, const int background_label_id,
const CodeType code_type, const bool variance_encoded_in_target,
std::vector<LabelBBox>* all_decode_bboxes);
// Get prior bounding boxes from prior_data.
// prior_data: 1 x 2 x num_priors * 4 x 1 blob.
// num_priors: number of priors.
// prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox.
// prior_variances: stores all the variances needed by prior bboxes.
void GetPriorBBoxes(const float* priorData, const int& numPriors,
std::vector<caffe::NormalizedBBox>* priorBBoxes,
std::vector<std::vector<float> >* priorVariances);
// Scale the caffe::NormalizedBBox w.r.t. height and width.
void ScaleBBox(const caffe::NormalizedBBox& bbox, const int height, const int width,
caffe::NormalizedBBox* scale_bbox);
// Do non maximum suppression given bboxes and scores.
// Inspired by Piotr Dollar's NMS implementation in EdgeBox.
// https://goo.gl/jV3JYS
// bboxes: a set of bounding boxes.
// scores: a set of corresponding confidences.
// score_threshold: a threshold used to filter detection results.
// nms_threshold: a threshold used in non maximum suppression.
// top_k: if not -1, keep at most top_k picked indices.
// indices: the kept indices of bboxes after nms.
void ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes,
const std::vector<float>& scores, const float score_threshold,
const float nms_threshold, const int top_k, std::vector<int>* indices);
// Do non maximum suppression given bboxes and scores.
// bboxes: a set of bounding boxes.
// scores: a set of corresponding confidences.
// threshold: the threshold used in non maximu suppression.
// top_k: if not -1, keep at most top_k picked indices.
// reuse_overlaps: if true, use and update overlaps; otherwise, always
// compute overlap.
// overlaps: a temp place to optionally store the overlaps between pairs of
// bboxes if reuse_overlaps is true.
// indices: the kept indices of bboxes after nms.
void ApplyNMS(const std::vector<caffe::NormalizedBBox>& bboxes,
const std::vector<float>& scores,
const float threshold, const int top_k, const bool reuse_overlaps,
std::map<int, std::map<int, float> >* overlaps, std::vector<int>* indices);
void ApplyNMS(const bool* overlapped, const int num, std::vector<int>* indices);
// Get confidence predictions from conf_data.
// conf_data: num x num_preds_per_class * num_classes blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_classes: number of classes.
// conf_preds: stores the confidence prediction, where each item contains
// confidence prediction for an image.
void GetConfidenceScores(const float* conf_data, const int num,
const int num_preds_per_class, const int num_classes,
std::vector<std::map<int, std::vector<float> > >* conf_scores);
// Get confidence predictions from conf_data.
// conf_data: num x num_preds_per_class * num_classes blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_classes: number of classes.
// class_major: if true, data layout is
// num x num_classes x num_preds_per_class; otherwise, data layerout is
// num x num_preds_per_class * num_classes.
// conf_preds: stores the confidence prediction, where each item contains
// confidence prediction for an image.
void GetConfidenceScores(const float* conf_data, const int num,
const int num_preds_per_class, const int num_classes,
const bool class_major,
std::vector<std::map<int, std::vector<float> > >* conf_scores);
// Get location predictions from loc_data.
// loc_data: num x num_preds_per_class * num_loc_classes * 4 blob.
// num: the number of images.
// num_preds_per_class: number of predictions per class.
// num_loc_classes: number of location classes. It is 1 if share_location is
// true; and is equal to number of classes needed to predict otherwise.
// share_location: if true, all classes share the same location prediction.
// loc_preds: stores the location prediction, where each item contains
// location prediction for an image.
void GetLocPredictions(const float* loc_data, const int num,
const int num_preds_per_class, const int num_loc_classes,
const bool share_location, std::vector<LabelBBox>* loc_preds);
// Get max scores with corresponding indices.
// scores: a set of scores.
// threshold: only consider scores higher than the threshold.
// top_k: if -1, keep all; otherwise, keep at most top_k.
// score_index_vec: store the sorted (score, index) pair.
void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold,
const int top_k, std::vector<std::pair<float, int> >* score_index_vec);
// Compute the jaccard (intersection over union IoU) overlap between two bboxes.
float JaccardOverlap(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2,
const bool normalized = true);
// Compute the intersection between two bboxes.
void IntersectBBox(const caffe::NormalizedBBox& bbox1, const caffe::NormalizedBBox& bbox2,
caffe::NormalizedBBox* intersect_bbox);
// Compute bbox size.
float BBoxSize(const caffe::NormalizedBBox& bbox, const bool normalized = true);
};
}
}
#endif

@ -0,0 +1,117 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "flatten_layer.hpp"
#include <float.h>
#include <algorithm>
namespace cv
{
namespace dnn
{
FlattenLayer::FlattenLayer(LayerParams &params) : Layer(params)
{
_startAxis = params.get<int>("axis", 1);
_endAxis = params.get<int>("end_axis", -1);
}
void FlattenLayer::checkInputs(const std::vector<Blob*> &inputs)
{
CV_Assert(inputs.size() > 0);
for (size_t i = 1; i < inputs.size(); i++)
{
for (size_t j = 0; j < _numAxes; j++)
{
CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]);
}
}
}
void FlattenLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
checkInputs(inputs);
_numAxes = inputs[0]->dims();
_endAxis = inputs[0]->canonicalAxis(_endAxis);
CV_Assert(_startAxis >= 0);
CV_Assert(_endAxis >= _startAxis && _endAxis < (int)_numAxes);
size_t flattenedDimensionSize = 1;
for (int i = _startAxis; i <= _endAxis; i++)
{
flattenedDimensionSize *= inputs[0]->size(i);
}
std::vector<int> outputShapeVec;
for (int i = 0; i < _startAxis; i++)
{
outputShapeVec.push_back(inputs[0]->size(i));
}
outputShapeVec.push_back(flattenedDimensionSize);
for (size_t i = _endAxis + 1; i < _numAxes; i++)
{
outputShapeVec.push_back(inputs[0]->size(i));
}
CV_Assert(outputShapeVec.size() <= 4);
resultShape = BlobShape(outputShapeVec);
for (size_t i = 0; i < inputs.size(); i++)
{
//in-place
outputs[i].shareFrom(*inputs[i]);
outputs[i].reshape(resultShape);
}
}
void FlattenLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for (size_t j = 0; j < inputs.size(); j++)
{
outputs[j].shareFrom(*inputs[j]);
outputs[j].reshape(resultShape);
}
}
}
}

@ -0,0 +1,67 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_FLATTEN_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class FlattenLayer : public Layer
{
int _startAxis;
int _endAxis;
size_t _numAxes;
BlobShape resultShape;
public:
FlattenLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void checkInputs(const std::vector<Blob*> &inputs);
};
}
}
#endif

@ -46,5 +46,104 @@ namespace cv
namespace dnn
{
namespace util
{
std::string makeName(const std::string& str1, const std::string& str2)
{
return str1 + str2;
}
bool getParameter(LayerParams &params, const std::string& nameBase, const std::string& nameAll, int &parameterH, int &parameterW, bool hasDefault = false, const int& defaultValue = 0)
{
std::string nameH = makeName(nameBase, std::string("_h"));
std::string nameW = makeName(nameBase, std::string("_w"));
std::string nameAll_ = nameAll;
if(nameAll_ == "")
{
nameAll_ = nameBase;
}
if (params.has(nameH) && params.has(nameW))
{
parameterH = params.get<int>(nameH);
parameterW = params.get<int>(nameW);
return true;
}
else
{
if (params.has(nameAll_))
{
parameterH = parameterW = params.get<int>(nameAll_);
return true;
}
else
{
if(hasDefault)
{
parameterH = parameterW = defaultValue;
return true;
}
else
{
return false;
}
}
}
}
void getKernelSize(LayerParams &params, int &kernelH, int &kernelW)
{
if(!util::getParameter(params, "kernel", "kernel_size", kernelH, kernelW))
{
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
CV_Assert(kernelH > 0 && kernelW > 0);
}
void getStrideAndPadding(LayerParams &params, int &padH, int &padW, int &strideH, int &strideW)
{
util::getParameter(params, "pad", "pad", padH, padW, true, 0);
util::getParameter(params, "stride", "stride", strideH, strideW, true, 1);
CV_Assert(padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
}
}
void getPoolingKernelParams(LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW)
{
util::getStrideAndPadding(params, padH, padW, strideH, strideW);
globalPooling = params.has("global_pooling");
if (globalPooling)
{
if(params.has("kernel_h") || params.has("kernel_w") || params.has("kernel_size"))
{
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, kernel_size (or kernel_h and kernel_w) cannot be specified");
}
if(padH != 0 || padW != 0 || strideH != 1 || strideW != 1)
{
CV_Error(cv::Error::StsBadArg, "In global_pooling mode, pad_h and pad_w must be = 0, and stride_h and stride_w must be = 1");
}
}
else
{
util::getKernelSize(params, kernelH, kernelW);
}
}
void getConvolutionKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW)
{
util::getKernelSize(params, kernelH, kernelW);
util::getStrideAndPadding(params, padH, padW, strideH, strideW);
util::getParameter(params, "dilation", "dilation", dilationH, dilationW, true, 1);
CV_Assert(dilationH > 0 && dilationW > 0);
}
}
}

@ -50,6 +50,10 @@ namespace cv
namespace dnn
{
void getConvolutionKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW, int &dilationH, int &dilationW);
void getPoolingKernelParams(LayerParams &params, int &kernelH, int &kernelW, bool &globalPooling, int &padH, int &padW, int &strideH, int &strideW);
}
}

@ -208,9 +208,8 @@ void LRNLayerImpl::spatialNormalization(Blob &src, Blob &dst)
template<>
void LRNLayerImpl::sqrBoxFilter_<Mat>(const Mat &src, Mat &dst)
{
Mat bufMat = buf.getRef<Mat>();
src.copyTo(bufMat);
cv::sqrBoxFilter(bufMat, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
Mat srcRawWrapper(src.rows, src.cols, src.type(), src.data, src.step[0]);
cv::sqrBoxFilter(srcRawWrapper, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
}
template<>

@ -0,0 +1,201 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "normalize_bbox_layer.hpp"
#include "op_blas.hpp"
#include <float.h>
#include <algorithm>
namespace cv
{
namespace dnn
{
const std::string NormalizeBBoxLayer::_layerName = std::string("NormalizeBBox");
bool NormalizeBBoxLayer::getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result)
{
if (!params.has(parameterName))
{
return false;
}
result = params.get(parameterName);
return true;
}
template<typename T>
T NormalizeBBoxLayer::getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx,
const bool required,
const T& defaultValue)
{
DictValue dictValue;
bool success = getParameterDict(params, parameterName, dictValue);
if(!success)
{
if(required)
{
std::string message = _layerName;
message += " layer parameter does not contain ";
message += parameterName;
message += " parameter.";
CV_Error(Error::StsBadArg, message);
}
else
{
return defaultValue;
}
}
return dictValue.get<T>(idx);
}
NormalizeBBoxLayer::NormalizeBBoxLayer(LayerParams &params) : Layer(params)
{
_eps = getParameter<float>(params, "eps", 0, false, 1e-10f);
_across_spatial = getParameter<bool>(params, "across_spatial");
_channel_shared = getParameter<bool>(params, "channel_shared");
}
void NormalizeBBoxLayer::checkInputs(const std::vector<Blob*> &inputs)
{
CV_Assert(inputs.size() > 0);
for (size_t i = 1; i < inputs.size(); i++)
{
for (size_t j = 0; j < _numAxes; j++)
{
CV_Assert(inputs[i]->shape()[j] == inputs[0]->shape()[j]);
}
}
CV_Assert(inputs[0]->dims() > 2);
}
void NormalizeBBoxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
checkInputs(inputs);
_num = inputs[0]->num();
_channels = inputs[0]->shape()[1];
_rows = inputs[0]->shape()[2];
_cols = inputs[0]->shape()[3];
_channelSize = _rows * _cols;
_imageSize = _channelSize * _channels;
_buffer = Mat(_channels, _channelSize, CV_32F);
_sumChannelMultiplier = Mat(_channels, 1, CV_32F, Scalar(1.0));
_sumSpatialMultiplier = Mat(1, _channelSize, CV_32F, Scalar(1.0));
_scale = blobs[0];
for(size_t i = 0; i < inputs.size(); i++)
{
outputs[i].create(BlobShape(inputs[0]->shape()));
}
}
void NormalizeBBoxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Mat zeroBuffer(_channels, _channelSize, CV_32F, Scalar(0));
Mat absDiff;
for (size_t j = 0; j < inputs.size(); j++)
{
for (size_t n = 0; n < _num; ++n)
{
Mat src = Mat(_channels, _channelSize, CV_32F, inputs[j]->ptrf(n));
Mat dst = Mat(_channels, _channelSize, CV_32F, outputs[j].ptrf(n));
_buffer = src.mul(src);
if (_across_spatial)
{
absdiff(_buffer, zeroBuffer, absDiff);
// add eps to avoid overflow
double absSum = sum(absDiff)[0] + _eps;
float norm = sqrt(absSum);
dst = src / norm;
}
else
{
Mat norm(_channelSize, 1, _buffer.type()); // 1 x _channelSize
// (_channels x_channelSize)T * _channels x 1 -> _channelSize x 1
gemmCPU(_buffer, _sumChannelMultiplier, 1, norm, 0, GEMM_1_T);
// compute norm
pow(norm, 0.5f, norm);
// scale the layer
// _channels x 1 * (_channelSize x 1)T -> _channels x _channelSize
gemmCPU(_sumChannelMultiplier, norm, 1, _buffer, 0, GEMM_2_T);
dst = src / _buffer;
}
// scale the output
if (_channel_shared)
{
// _scale: 1 x 1
dst *= _scale.matRefConst().at<float>(0, 0);
}
else
{
// _scale: _channels x 1
// _channels x 1 * 1 x _channelSize -> _channels x _channelSize
gemmCPU(_scale.matRefConst(), _sumSpatialMultiplier, 1, _buffer, 0);
dst = dst.mul(_buffer);
}
}
}
}
}
}

@ -0,0 +1,94 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_NORMALIZEBBOX_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class NormalizeBBoxLayer : public Layer
{
Mat _buffer;
Mat _sumChannelMultiplier;
Mat _sumSpatialMultiplier;
Blob _scale;
float _eps;
bool _across_spatial;
bool _channel_shared;
size_t _num;
size_t _channels;
size_t _rows;
size_t _cols;
size_t _channelSize;
size_t _imageSize;
static const size_t _numAxes = 4;
static const std::string _layerName;
public:
NormalizeBBoxLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void checkInputs(const std::vector<Blob*> &inputs);
template<typename T>
T getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx = 0,
const bool required = true,
const T& defaultValue = T());
bool getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result);
};
}
}
#endif

@ -56,8 +56,12 @@ bool im2col_ocl(const UMat &img,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
int dilation_h, int dilation_w,
UMat &col)
{
//TODO
CV_Assert(dilation_h == 1 && dilation_w == 1);
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;

@ -41,8 +41,8 @@
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#include "../precomp.hpp"
#include <iostream>
#include <opencv2/core.hpp>
#include <cstdlib>
namespace cv
{
@ -57,6 +57,7 @@ class im2col_CpuPBody : public cv::ParallelLoopBody
int kernel_h, kernel_w;
int pad_h, pad_w;
int stride_h, stride_w;
int dilation_h, dilation_w;
Dtype* data_col;
int height_col, width_col, channels_col;
@ -68,17 +69,21 @@ public:
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
int dilation_h, int dilation_w,
Dtype* data_col)
{
im2col_CpuPBody<Dtype> t;
t.data_im = data_im;
t.data_col = data_col;
t.channels = channels; t.height = height; t.width = width;
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
t.dilation_h = dilation_h; t.dilation_w = dilation_w;
t.height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
t.width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
t.channels_col = channels * kernel_h * kernel_w;
cv::parallel_for_(Range(0, t.channels_col), t);
@ -86,17 +91,20 @@ public:
virtual void operator ()(const Range &r) const
{
for (int c = r.start; c < r.end; ++c) {
for (int c = r.start; c < r.end; ++c)
{
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
for (int h = 0; h < height_col; ++h)
{
for (int w = 0; w < width_col; ++w)
{
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad];
data_im[(c_im * height + h_pad) * width + w_pad];
else
data_col[(c * height_col + h) * width_col + w] = 0;
}
@ -180,10 +188,11 @@ void col2im_cpu(const Dtype* data_col,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
int dilation_h, int dilation_w,
Dtype* data_im)
{
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
int width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
std::memset(data_im, 0, height * width * channels * sizeof(Dtype));
@ -198,12 +207,12 @@ void col2im_cpu(const Dtype* data_col,
{
for (int w = 0; w < width_col; ++w)
{
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(c_im * height + h_pad) * width + w_pad] +=
data_col[(c * height_col + h) * width_col + w];
data_col[(c * height_col + h) * width_col + w];
}
}
}
@ -215,6 +224,7 @@ bool im2col_ocl(const UMat &img,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
int dilation_h, int dilation_w,
UMat &col);
bool col2im_ocl(const UMat &col,

@ -0,0 +1,185 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "permute_layer.hpp"
#include <float.h>
#include <algorithm>
namespace cv
{
namespace dnn
{
void PermuteLayer::checkCurrentOrder(int currentOrder)
{
if(currentOrder < 0 || currentOrder > 3)
{
CV_Error(
Error::StsBadArg,
"Orders of dimensions in Permute layer parameter"
"must be in [0...3] interval");
}
if(std::find(_order.begin(), _order.end(), currentOrder) != _order.end())
{
CV_Error(Error::StsBadArg,
"Permute layer parameter contains duplicated orders.");
}
}
void PermuteLayer::checkNeedForPermutation()
{
_needsPermute = false;
for (size_t i = 0; i < _numAxes; ++i)
{
if (_order[i] != i)
{
_needsPermute = true;
break;
}
}
}
PermuteLayer::PermuteLayer(LayerParams &params) : Layer(params)
{
if (!params.has("order"))
{
_needsPermute = false;
return;
}
DictValue paramOrder = params.get("order");
if(paramOrder.size() > 4)
{
CV_Error(
Error::StsBadArg,
"Too many (> 4) orders of dimensions in Permute layer");
}
_numAxes = paramOrder.size();
for (size_t i = 0; i < _numAxes; i++)
{
int currentOrder = paramOrder.get<int>(i);
checkCurrentOrder(currentOrder);
_order.push_back(currentOrder);
}
checkNeedForPermutation();
}
void PermuteLayer::computeStrides()
{
_oldStride.resize(_numAxes);
_newStride.resize(_numAxes);
_oldStride[_numAxes - 1] = 1;
_newStride[_numAxes - 1] = 1;
for(int i = _numAxes - 2; i >= 0; i--)
{
_oldStride[i] = _oldStride[i + 1] * _oldDimensionSize[i + 1];
_newStride[i] = _newStride[i + 1] * _newDimensionSize[i + 1];
}
_count = _oldStride[0] * _oldDimensionSize[0];
}
void PermuteLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
if(!_needsPermute)
{
return;
}
CV_Assert(inputs.size() > 0);
CV_Assert((int)_numAxes == inputs[0]->shape().dims());
outputs.resize(inputs.size());
_oldDimensionSize = inputs[0]->shape();
for (size_t i = 0; i < _numAxes; i++)
{
_newDimensionSize[i] = _oldDimensionSize[_order[i]];
}
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->rows() == _oldDimensionSize[2] && inputs[i]->cols() == _oldDimensionSize[3]);
outputs[i].create(BlobShape(_newDimensionSize));
}
computeStrides();
}
void PermuteLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
if(!_needsPermute)
{
for (size_t j = 0; j < inputs.size(); j++)
{
outputs[j].matRef() = inputs[j]->matRef();
}
return;
}
for (size_t k = 0; k < inputs.size(); k++)
{
float *srcData = inputs[k]->ptrf();
float *dstData = outputs[k].ptrf();
for (size_t i = 0; i < _count; ++i)
{
int oldPosition = 0;
int newPosition = i;
for (size_t j = 0; j < _numAxes; ++j)
{
oldPosition += (newPosition / _newStride[j]) * _oldStride[_order[j]];
newPosition %= _newStride[j];
}
dstData[i] = srcData[oldPosition];
}
}
}
}
}

@ -0,0 +1,75 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_PERMUTE_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class PermuteLayer : public Layer
{
size_t _count;
std::vector<size_t> _order;
BlobShape _oldDimensionSize;
BlobShape _newDimensionSize;
std::vector<size_t> _oldStride;
std::vector<size_t> _newStride;
bool _needsPermute;
size_t _numAxes;
void checkCurrentOrder(int currentOrder);
void checkNeedForPermutation();
void computeStrides();
public:
PermuteLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif

@ -57,11 +57,12 @@ namespace dnn
PoolingLayerImpl::PoolingLayerImpl()
{
globalPooling = false;
}
PoolingLayerImpl::PoolingLayerImpl(int type_, Size kernel_, Size stride_, Size pad_)
{
globalPooling = false;
type = type_;
kernel = kernel_;
pad = pad_;
@ -73,6 +74,12 @@ void PoolingLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Bl
CV_Assert(inputs.size() > 0);
inp = inputs[0]->size2();
if(globalPooling)
{
kernel = inp;
}
computeOutputShape(inp);
useOpenCL = ocl::useOpenCL();
@ -266,5 +273,12 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
return Ptr<PoolingLayer>(new PoolingLayerImpl(type, kernel, stride, pad));
}
Ptr<PoolingLayer> PoolingLayer::createGlobal(int type)
{
Ptr<PoolingLayer> l = PoolingLayer::create(type);
l->globalPooling = true;
return l;
}
}
}

@ -0,0 +1,307 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "prior_box_layer.hpp"
#include <float.h>
#include <algorithm>
#include <cmath>
namespace cv
{
namespace dnn
{
const std::string PriorBoxLayer::_layerName = std::string("PriorBox");
bool PriorBoxLayer::getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result)
{
if (!params.has(parameterName))
{
return false;
}
result = params.get(parameterName);
return true;
}
template<typename T>
T PriorBoxLayer::getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx,
const bool required,
const T& defaultValue)
{
DictValue dictValue;
bool success = getParameterDict(params, parameterName, dictValue);
if(!success)
{
if(required)
{
std::string message = _layerName;
message += " layer parameter does not contain ";
message += parameterName;
message += " parameter.";
CV_Error(Error::StsBadArg, message);
}
else
{
return defaultValue;
}
}
return dictValue.get<T>(idx);
}
void PriorBoxLayer::getAspectRatios(const LayerParams &params)
{
DictValue aspectRatioParameter;
bool aspectRatioRetieved = getParameterDict(params, "aspect_ratio", aspectRatioParameter);
CV_Assert(aspectRatioRetieved);
for (int i = 0; i < aspectRatioParameter.size(); ++i)
{
float aspectRatio = aspectRatioParameter.get<float>(i);
bool alreadyExists = false;
for (size_t j = 0; j < _aspectRatios.size(); ++j)
{
if (fabs(aspectRatio - _aspectRatios[j]) < 1e-6)
{
alreadyExists = true;
break;
}
}
if (!alreadyExists)
{
_aspectRatios.push_back(aspectRatio);
if (_flip)
{
_aspectRatios.push_back(1./aspectRatio);
}
}
}
}
void PriorBoxLayer::getVariance(const LayerParams &params)
{
DictValue varianceParameter;
bool varianceParameterRetrieved = getParameterDict(params, "variance", varianceParameter);
CV_Assert(varianceParameterRetrieved);
int varianceSize = varianceParameter.size();
if (varianceSize > 1)
{
// Must and only provide 4 variance.
CV_Assert(varianceSize == 4);
for (int i = 0; i < varianceSize; ++i)
{
float variance = varianceParameter.get<float>(i);
CV_Assert(variance > 0);
_variance.push_back(variance);
}
}
else
{
if (varianceSize == 1)
{
float variance = varianceParameter.get<float>(0);
CV_Assert(variance > 0);
_variance.push_back(variance);
}
else
{
// Set default to 0.1.
_variance.push_back(0.1f);
}
}
}
PriorBoxLayer::PriorBoxLayer(LayerParams &params) : Layer(params)
{
_minSize = getParameter<unsigned>(params, "min_size");
CV_Assert(_minSize > 0);
_flip = getParameter<bool>(params, "flip");
_clip = getParameter<bool>(params, "clip");
_aspectRatios.clear();
_aspectRatios.push_back(1.);
getAspectRatios(params);
getVariance(params);
_numPriors = _aspectRatios.size();
_maxSize = -1;
if (params.has("max_size"))
{
_maxSize = params.get("max_size").get<float>(0);
CV_Assert(_maxSize > _minSize);
_numPriors += 1;
}
}
void PriorBoxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 2);
_layerWidth = inputs[0]->cols();
_layerHeight = inputs[0]->rows();
_imageWidth = inputs[1]->cols();
_imageHeight = inputs[1]->rows();
_stepX = static_cast<float>(_imageWidth) / _layerWidth;
_stepY = static_cast<float>(_imageHeight) / _layerHeight;
// Since all images in a batch has same height and width, we only need to
// generate one set of priors which can be shared across all images.
size_t outNum = 1;
// 2 channels. First channel stores the mean of each prior coordinate.
// Second channel stores the variance of each prior coordinate.
size_t outChannels = 2;
_outChannelSize = _layerHeight * _layerWidth * _numPriors * 4;
outputs[0].create(BlobShape(outNum, outChannels, _outChannelSize));
outputs[0].matRef() = 0;
}
void PriorBoxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
(void)inputs; // to suppress unused parameter warning
float* outputPtr = outputs[0].ptrf();
// first prior: aspect_ratio = 1, size = min_size
int idx = 0;
for (size_t h = 0; h < _layerHeight; ++h)
{
for (size_t w = 0; w < _layerWidth; ++w)
{
_boxWidth = _boxHeight = _minSize;
float center_x = (w + 0.5) * _stepX;
float center_y = (h + 0.5) * _stepY;
// xmin
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
// ymin
outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
// xmax
outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
// ymax
outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
if (_maxSize > 0)
{
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
_boxWidth = _boxHeight = sqrt(_minSize * _maxSize);
// xmin
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
// ymin
outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
// xmax
outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
// ymax
outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
}
// rest of priors
for (size_t r = 0; r < _aspectRatios.size(); ++r)
{
float ar = _aspectRatios[r];
if (fabs(ar - 1.) < 1e-6)
{
continue;
}
_boxWidth = _minSize * sqrt(ar);
_boxHeight = _minSize / sqrt(ar);
// xmin
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
// ymin
outputPtr[idx++] = (center_y - _boxHeight / 2.) / _imageHeight;
// xmax
outputPtr[idx++] = (center_x + _boxWidth / 2.) / _imageWidth;
// ymax
outputPtr[idx++] = (center_y + _boxHeight / 2.) / _imageHeight;
}
}
}
// clip the prior's coordidate such that it is within [0, 1]
if (_clip)
{
for (size_t d = 0; d < _outChannelSize; ++d)
{
outputPtr[d] = std::min<float>(std::max<float>(outputPtr[d], 0.), 1.);
}
}
// set the variance.
outputPtr = outputs[0].ptrf(0, 1);
if(_variance.size() == 1)
{
Mat secondChannel(outputs[0].rows(), outputs[0].cols(), CV_32F, outputPtr);
secondChannel.setTo(Scalar(_variance[0]));
}
else
{
int count = 0;
for (size_t h = 0; h < _layerHeight; ++h)
{
for (size_t w = 0; w < _layerWidth; ++w)
{
for (size_t i = 0; i < _numPriors; ++i)
{
for (int j = 0; j < 4; ++j)
{
outputPtr[count] = _variance[j];
++count;
}
}
}
}
}
}
}
}

@ -0,0 +1,101 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_PRIOR_BOX_LAYER_HPP__
#include "../precomp.hpp"
namespace cv
{
namespace dnn
{
class PriorBoxLayer : public Layer
{
size_t _layerWidth;
size_t _layerHeight;
size_t _imageWidth;
size_t _imageHeight;
size_t _outChannelSize;
float _stepX;
float _stepY;
float _minSize;
float _maxSize;
float _boxWidth;
float _boxHeight;
std::vector<float> _aspectRatios;
std::vector<float> _variance;
bool _flip;
bool _clip;
size_t _numPriors;
static const size_t _numAxes = 4;
static const std::string _layerName;
public:
PriorBoxLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
template<typename T>
T getParameter(const LayerParams &params,
const std::string &parameterName,
const size_t &idx = 0,
const bool required = true,
const T& defaultValue = T());
bool getParameterDict(const LayerParams &params,
const std::string &parameterName,
DictValue& result);
void getAspectRatios(const LayerParams &params);
void getVariance(const LayerParams &params);
};
}
}
#endif

@ -44,6 +44,7 @@
#include "op_blas.hpp"
#include <iostream>
#include <cmath>
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
@ -60,6 +61,7 @@ static void tanh(const Mat &src, Mat &dst)
*itDst = std::tanh(*itSrc);
}
//TODO: make utils method
static void tanh(const Mat &src, Mat &dst)
{
dst.create(src.dims, (const int*)src.size, src.type());
@ -86,9 +88,9 @@ class LSTMLayerImpl : public LSTMLayer
int dtype;
bool allocated;
BlobShape outTailShape; //shape of single output sample
BlobShape outTsMatShape, outTsShape; //shape of N output samples
BlobShape outResShape; //shape of T timestamps and N output samples
Shape outTailShape; //shape of single output sample
Shape outTsMatShape, outTsShape; //shape of N output samples
Shape outResShape; //shape of T timestamps and N output samples
bool useTimestampDim;
bool produceCellOutput;
@ -101,7 +103,7 @@ public:
useTimestampDim = true;
produceCellOutput = false;
allocated = false;
outTailShape = BlobShape::empty();
outTailShape = Shape::empty();
}
void setUseTimstampsDim(bool use)
@ -120,7 +122,7 @@ public:
{
CV_Assert(cInternal.empty() || C.total() == cInternal.total());
if (!cInternal.empty())
C.reshaped(BlobShape::like(cInternal)).matRefConst().copyTo(cInternal);
C.reshaped(Shape::like(cInternal)).matRefConst().copyTo(cInternal);
else
C.matRefConst().copyTo(cInternal);
}
@ -129,7 +131,7 @@ public:
{
CV_Assert(hInternal.empty() || H.total() == hInternal.total());
if (!hInternal.empty())
H.reshaped(BlobShape::like(hInternal)).matRefConst().copyTo(hInternal);
H.reshaped(Shape::like(hInternal)).matRefConst().copyTo(hInternal);
else
H.matRefConst().copyTo(hInternal);
}
@ -153,7 +155,7 @@ public:
return res;
}
void setOutShape(const BlobShape &outTailShape_)
void setOutShape(const Shape &outTailShape_)
{
CV_Assert(!allocated || outTailShape_.total() == outTailShape.total());
outTailShape = outTailShape_;
@ -171,7 +173,7 @@ public:
blobs[0] = Wh;
blobs[1] = Wx;
blobs[2] = bias;
blobs[2].reshape(BlobShape(1, (int)bias.total()));
blobs[2].reshape(Shape(1, (int)bias.total()));
}
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
@ -186,24 +188,24 @@ public:
if (!outTailShape.isEmpty())
CV_Assert(outTailShape.total() == numOut);
else
outTailShape = BlobShape(numOut);
outTailShape = Shape(numOut);
if (useTimestampDim)
{
CV_Assert(input[0]->dims() >= 2 && (int)input[0]->total(2) == numInp);
numTimeStamps = input[0]->size(0);
numSamples = input[0]->size(1);
outResShape = BlobShape(numTimeStamps, numSamples) + outTailShape;
outResShape = Shape(numTimeStamps, numSamples) + outTailShape;
}
else
{
CV_Assert(input[0]->dims() >= 1 && (int)input[0]->total(1) == numInp);
numTimeStamps = 1;
numSamples = input[0]->size(0);
outResShape = BlobShape(numSamples) + outTailShape;
outResShape = Shape(numSamples) + outTailShape;
}
outTsMatShape = BlobShape(numSamples, numOut);
outTsShape = BlobShape(numSamples) + outTailShape;
outTsMatShape = Shape(numSamples, numOut);
outTsShape = Shape(numSamples) + outTailShape;
dtype = input[0]->type();
CV_Assert(dtype == CV_32F || dtype == CV_64F);
@ -246,25 +248,25 @@ public:
void forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
const Mat &Wh = blobs[0].matRefConst();
const Mat &Wx = blobs[1].matRefConst();
const Mat &bias = blobs[2].matRefConst();
const Mat &Wh = blobs[0].getRefConst<Mat>();
const Mat &Wx = blobs[1].getRefConst<Mat>();
const Mat &bias = blobs[2].getRefConst<Mat>();
int numSamplesTotal = numTimeStamps*numSamples;
Mat xTs = input[0]->reshaped(BlobShape(numSamplesTotal, numInp)).matRefConst();
Mat xTs = reshaped(input[0]->getRefConst<Mat>(), Shape(numSamplesTotal, numInp));
BlobShape outMatShape(numSamplesTotal, numOut);
Mat hOutTs = output[0].reshaped(outMatShape).matRef();
Mat cOutTs = (produceCellOutput) ? output[1].reshaped(outMatShape).matRef() : Mat();
Shape outMatShape(numSamplesTotal, numOut);
Mat hOutTs = reshaped(output[0].getRef<Mat>(), outMatShape);
Mat cOutTs = (produceCellOutput) ? reshaped(output[1].getRef<Mat>(), outMatShape) : Mat();
for (int ts = 0; ts < numTimeStamps; ts++)
{
Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemmCPU(xCurr, Wx, 1, gates, 0, GEMM_2_T); // Wx * x_t
gemmCPU(hInternal, Wh, 1, gates, 1, GEMM_2_T); //+Wh * h_{t-1}
gemmCPU(dummyOnes, bias, 1, gates, 1); //+b
dnn::gemm(xCurr, Wx, 1, gates, 0, GEMM_2_T); // Wx * x_t
dnn::gemm(hInternal, Wh, 1, gates, 1, GEMM_2_T); //+Wh * h_{t-1}
dnn::gemm(dummyOnes, bias, 1, gates, 1); //+b
Mat getesIFO = gates.colRange(0, 3*numOut);
Mat gateI = gates.colRange(0*numOut, 1*numOut);
@ -394,30 +396,30 @@ public:
void reshapeOutput(std::vector<Blob> &output)
{
output.resize((produceH) ? 2 : 1);
output[0].create(BlobShape(numTimestamps, numSamples, numO), dtype);
output[0].create(Shape(numTimestamps, numSamples, numO), dtype);
if (produceH)
output[1].create(BlobShape(numTimestamps, numSamples, numH), dtype);
output[1].create(Shape(numTimestamps, numSamples, numH), dtype);
}
void forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
Mat xTs = input[0]->reshaped(BlobShape(numSamplesTotal, numX)).matRefConst();
Mat oTs = output[0].reshaped(BlobShape(numSamplesTotal, numO)).matRef();
Mat hTs = (produceH) ? output[1].reshaped(BlobShape(numSamplesTotal, numH)).matRef() : Mat();
Mat xTs = reshaped(input[0]->getRefConst<Mat>(), Shape(numSamplesTotal, numX));
Mat oTs = reshaped(output[0].getRef<Mat>(), Shape(numSamplesTotal, numO));
Mat hTs = (produceH) ? reshaped(output[1].getRef<Mat>(), Shape(numSamplesTotal, numH)) : Mat();
for (int ts = 0; ts < numTimestamps; ts++)
{
Range curRowRange = Range(ts * numSamples, (ts + 1) * numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemmCPU(hPrev, Whh, 1, hCurr, 0, GEMM_2_T); // W_{hh} * h_{prev}
gemmCPU(xCurr, Wxh, 1, hCurr, 1, GEMM_2_T); //+W_{xh} * x_{curr}
gemmCPU(dummyBiasOnes, bh, 1, hCurr, 1); //+bh
dnn::gemm(hPrev, Whh, 1, hCurr, 0, GEMM_2_T); // W_{hh} * h_{prev}
dnn::gemm(xCurr, Wxh, 1, hCurr, 1, GEMM_2_T); //+W_{xh} * x_{curr}
dnn::gemm(dummyBiasOnes, bh, 1, hCurr, 1); //+bh
tanh(hCurr, hPrev);
Mat oCurr = oTs.rowRange(curRowRange);
gemmCPU(hPrev, Who, 1, oCurr, 0, GEMM_2_T); // W_{ho} * h_{prev}
gemmCPU(dummyBiasOnes, bo, 1, oCurr, 1); //+b_o
dnn::gemm(hPrev, Who, 1, oCurr, 0, GEMM_2_T); // W_{ho} * h_{prev}
dnn::gemm(dummyBiasOnes, bo, 1, oCurr, 1); //+b_o
tanh(oCurr, oCurr);
if (produceH)

@ -692,13 +692,13 @@ struct TorchImporter : public ::cv::dnn::Importer
}
};
CV_EXPORTS Ptr<Importer> createTorchImporter(const String &filename, bool isBinary)
Ptr<Importer> createTorchImporter(const String &filename, bool isBinary)
{
return Ptr<Importer>(new TorchImporter(filename, isBinary));
}
CV_EXPORTS Blob readTorchBlob(const String &filename, bool isBinary)
Blob readTorchBlob(const String &filename, bool isBinary)
{
Ptr<TorchImporter> importer(new TorchImporter(filename, isBinary));
importer->readObject();
@ -709,13 +709,13 @@ CV_EXPORTS Blob readTorchBlob(const String &filename, bool isBinary)
#else //ENABLE_TORCH_IMPORTER
CV_EXPORTS Ptr<Importer> createTorchImporter(const String&, bool)
Ptr<Importer> createTorchImporter(const String&, bool)
{
CV_Error(Error::StsNotImplemented, "Module was build without Torch importer");
return Ptr<Importer>();
}
CV_EXPORTS Blob readTorchMat(const String&, bool)
Blob readTorchBlob(const String&, bool)
{
CV_Error(Error::StsNotImplemented, "Module was build without Torch importer");
return Blob();

@ -29,11 +29,11 @@ Explanation
Put these files into working dir of this program example.
-# Create the importer of Caffe models
@snippet dnn/samples/caffe_googlenet.cpp Create the importer of Caffe model
-# Read and initialize network using path to .prototxt and .caffemodel files
@snippet dnn/samples/caffe_googlenet.cpp Read and initialize network
-# Create the network and initialize its by using the created importer
@snippet dnn/samples/caffe_googlenet.cpp Initialize network
-# Check that network was read successfully
@snippet dnn/samples/caffe_googlenet.cpp Check that network was read successfully
-# Read input image and convert to the blob, acceptable by GoogleNet
@snippet dnn/samples/caffe_googlenet.cpp Prepare blob
@ -41,7 +41,7 @@ Explanation
Now image is actually a 3-dimensional array with 224x224x3 shape.
Next, we convert the image to 4-dimensional blob (so-called batch) with 1x2x224x224 shape by using special @ref cv::dnn::Blob constructor.
Next, we convert the image to 4-dimensional blob (so-called batch) with 1x3x224x224 shape by using special cv::dnn::Blob::fromImages constructor.
-# Pass the blob to the network
@snippet dnn/samples/caffe_googlenet.cpp Set input blob

Loading…
Cancel
Save