Merge pull request #707 from ludv1x:dnn

pull/746/head
Vadim Pisarevsky 9 years ago
commit 9a342b5187
  1. 25
      modules/dnn/CMakeLists.txt
  2. 97
      modules/dnn/cmake/FindAtlas.cmake
  3. 106
      modules/dnn/cmake/FindOpenBLAS.cmake
  4. 60
      modules/dnn/cmake/OpenCVFindCBLAS.cmake
  5. 123
      modules/dnn/cmake/OpenCVFindMKL.cmake
  6. 371
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  7. 155
      modules/dnn/include/opencv2/dnn/blob.hpp
  8. 281
      modules/dnn/include/opencv2/dnn/blob.inl.hpp
  9. 4
      modules/dnn/include/opencv2/dnn/dict.hpp
  10. 25
      modules/dnn/include/opencv2/dnn/dnn.hpp
  11. 6
      modules/dnn/include/opencv2/dnn/dnn.inl.hpp
  12. 17
      modules/dnn/include/opencv2/dnn/layer.hpp
  13. 137
      modules/dnn/include/opencv2/dnn/shape_utils.hpp
  14. 80
      modules/dnn/perf/perf_convolution.cpp
  15. 3
      modules/dnn/perf/perf_main.cpp
  16. 17
      modules/dnn/perf/perf_precomp.hpp
  17. 1
      modules/dnn/samples/.gitignore
  18. 4
      modules/dnn/samples/caffe_googlenet.cpp
  19. 432
      modules/dnn/src/blob.cpp
  20. 4
      modules/dnn/src/caffe/caffe_importer.cpp
  21. 294
      modules/dnn/src/caffe/layer_loaders.cpp
  22. 60
      modules/dnn/src/caffe/layer_loaders.hpp
  23. 7
      modules/dnn/src/dnn.cpp
  24. 51
      modules/dnn/src/init.cpp
  25. 96
      modules/dnn/src/layers/concat_layer.cpp
  26. 27
      modules/dnn/src/layers/concat_layer.hpp
  27. 366
      modules/dnn/src/layers/convolution_layer.cpp
  28. 82
      modules/dnn/src/layers/convolution_layer.hpp
  29. 46
      modules/dnn/src/layers/elementwise_layers.cpp
  30. 314
      modules/dnn/src/layers/elementwise_layers.hpp
  31. 115
      modules/dnn/src/layers/fully_connected_layer.cpp
  32. 28
      modules/dnn/src/layers/fully_connected_layer.hpp
  33. 39
      modules/dnn/src/layers/layers_common.cpp
  34. 4
      modules/dnn/src/layers/layers_common.hpp
  35. 266
      modules/dnn/src/layers/lrn_layer.cpp
  36. 38
      modules/dnn/src/layers/lrn_layer.hpp
  37. 32
      modules/dnn/src/layers/mvn_layer.cpp
  38. 8
      modules/dnn/src/layers/mvn_layer.hpp
  39. 95
      modules/dnn/src/layers/op_blas.cpp
  40. 40
      modules/dnn/src/layers/op_blas.hpp
  41. 116
      modules/dnn/src/layers/op_im2col.cpp
  42. 231
      modules/dnn/src/layers/op_im2col.hpp
  43. 301
      modules/dnn/src/layers/pooling_layer.cpp
  44. 48
      modules/dnn/src/layers/pooling_layer.hpp
  45. 440
      modules/dnn/src/layers/recurrent_layers.cpp
  46. 54
      modules/dnn/src/layers/recurrent_layers.hpp
  47. 111
      modules/dnn/src/layers/reshape_layer.cpp
  48. 15
      modules/dnn/src/layers/reshape_layer.hpp
  49. 91
      modules/dnn/src/layers/slice_layer.cpp
  50. 16
      modules/dnn/src/layers/slice_layer.hpp
  51. 212
      modules/dnn/src/layers/softmax_layer.cpp
  52. 30
      modules/dnn/src/layers/softmax_layer.hpp
  53. 39
      modules/dnn/src/layers/split_layer.cpp
  54. 10
      modules/dnn/src/layers/split_layer.hpp
  55. 44
      modules/dnn/src/opencl/activations.cl
  56. 62
      modules/dnn/src/opencl/col2im.cl
  57. 10
      modules/dnn/src/opencl/im2col.cl
  58. 76
      modules/dnn/src/opencl/lrn.cl
  59. 94
      modules/dnn/src/opencl/pooling.cl
  60. 75
      modules/dnn/src/opencl/softmax.cl
  61. 1
      modules/dnn/src/precomp.hpp
  62. 28
      modules/dnn/src/torch/torch_importer.cpp
  63. 17
      modules/dnn/test/test_googlenet.cpp
  64. 267
      modules/dnn/test/test_layers.cpp
  65. 28
      modules/dnn/test/test_main.cpp
  66. 1
      modules/dnn/testdata/dnn/.gitignore

@ -17,15 +17,38 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4701)
# Resolve libprotobuf dependency
# ----------------------------------------------------------------------------
include(cmake/OpenCVFindLibProtobuf.cmake)
ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS})
ocv_source_group("Src\\protobuf" FILES ${PROTOBUF_SRCS} ${PROTOBUF_HDRS})
ocv_module_include_directories(include ${PROTOBUF_INCLUDE_DIR})
# ----------------------------------------------------------------------------
# Try to find BLAS libraries
# ----------------------------------------------------------------------------
OCV_OPTION(${the_module}_WITH_BLAS "Use external BLAS library to speedup processing" OFF)
include(cmake/OpenCVFindCBLAS.cmake)
ocv_glob_module_sources(${PROTOBUF_SRCS} ${PROTOBUF_HDRS} ${CBLAS_H_PROXY_PATH})
ocv_create_module(${PROTOBUF_LIBRARIES})
ocv_add_samples()
ocv_add_accuracy_tests()
ocv_add_perf_tests()
# ----------------------------------------------------------------------------
# Link BLAS
# ----------------------------------------------------------------------------
if(${the_module}_WITH_BLAS AND HAVE_BLAS)
add_definitions(-DHAVE_CBLAS=1)
ocv_module_include_directories(${${the_module}_BLAS_INCLUDE_DIR})
ocv_add_dependencies(${the_module} ${${the_module}_BLAS_LIBRARIES})
target_link_libraries(${the_module} ${${the_module}_BLAS_LIBRARIES})
if(${the_module}_BLAS_BINARIES)
ocv_install_target(${the_module} EXPORT ${the_module}_BLAS_BINARIES
RUNTIME DESTINATION ${OPENCV_BIN_INSTALL_PATH} COMPONENT libs)
endif()
else()
add_definitions(-DHAVE_CBLAS=0)
endif()
# ----------------------------------------------------------------------------
# Download pre-trained models for complex testing on GoogLeNet and AlexNet
# ----------------------------------------------------------------------------

@ -0,0 +1,97 @@
#COPYRIGHT
#
#All contributions by the University of California:
#Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
#All rights reserved.
#
#All other contributions:
#Copyright (c) 2014, 2015, the respective contributors
#All rights reserved.
#
#Caffe uses a shared copyright model: each contributor holds copyright over
#their contributions to Caffe. The project versioning records all such
#contribution and copyright details. If a contributor wants to further mark
#their specific copyright on a particular contribution, they should indicate
#their copyright solely in the commit message of the change when it is
#committed.
#
#LICENSE
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#CONTRIBUTION AGREEMENT
#
#By contributing to the BVLC/caffe repository through pull-request, comment,
#or otherwise, the contributor releases their content to the
#license and copyright terms herein.
# Find the Atlas (and Lapack) libraries
#
# The following variables are optionally searched for defaults
# Atlas_ROOT_DIR: Base directory where all Atlas components are found
#
# The following are set after configuration is done:
# Atlas_FOUND
# Atlas_INCLUDE_DIRS
# Atlas_LIBRARIES
# Atlas_LIBRARYRARY_DIRS
set(Atlas_INCLUDE_SEARCH_PATHS
/usr/include/atlas
/usr/include/atlas-base
$ENV{Atlas_ROOT_DIR}
$ENV{Atlas_ROOT_DIR}/include
)
set(Atlas_LIB_SEARCH_PATHS
/usr/lib/atlas
/usr/lib/atlas-base
$ENV{Atlas_ROOT_DIR}
$ENV{Atlas_ROOT_DIR}/lib
)
find_path(Atlas_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
find_library(Atlas_CBLAS_LIBRARY NAMES ptcblas_r ptcblas cblas_r cblas PATHS ${Atlas_LIB_SEARCH_PATHS})
find_library(Atlas_BLAS_LIBRARY NAMES atlas_r atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
set(LOOKED_FOR
Atlas_CBLAS_INCLUDE_DIR
Atlas_CLAPACK_INCLUDE_DIR
Atlas_CBLAS_LIBRARY
Atlas_BLAS_LIBRARY
Atlas_LAPACK_LIBRARY
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Atlas DEFAULT_MSG ${LOOKED_FOR})
if(ATLAS_FOUND)
set(Atlas_INCLUDE_DIR ${Atlas_CBLAS_INCLUDE_DIR} ${Atlas_CLAPACK_INCLUDE_DIR})
set(Atlas_LIBRARIES ${Atlas_LAPACK_LIBRARY} ${Atlas_CBLAS_LIBRARY} ${Atlas_BLAS_LIBRARY})
mark_as_advanced(${LOOKED_FOR})
message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR}, library: ${Atlas_BLAS_LIBRARY})")
endif(ATLAS_FOUND)

@ -0,0 +1,106 @@
#COPYRIGHT
#
#All contributions by the University of California:
#Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
#All rights reserved.
#
#All other contributions:
#Copyright (c) 2014, 2015, the respective contributors
#All rights reserved.
#
#Caffe uses a shared copyright model: each contributor holds copyright over
#their contributions to Caffe. The project versioning records all such
#contribution and copyright details. If a contributor wants to further mark
#their specific copyright on a particular contribution, they should indicate
#their copyright solely in the commit message of the change when it is
#committed.
#
#LICENSE
#
#Redistribution and use in source and binary forms, with or without
#modification, are permitted provided that the following conditions are met:
#
#1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#CONTRIBUTION AGREEMENT
#
#By contributing to the BVLC/caffe repository through pull-request, comment,
#or otherwise, the contributor releases their content to the
#license and copyright terms herein.
SET(Open_BLAS_INCLUDE_SEARCH_PATHS
/usr/include
/usr/include/openblas
/usr/include/openblas-base
/usr/local/include
/usr/local/include/openblas
/usr/local/include/openblas-base
/opt/OpenBLAS/include
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/include
)
SET(Open_BLAS_LIB_SEARCH_PATHS
/lib/
/lib/openblas-base
/lib64/
/usr/lib
/usr/lib/openblas-base
/usr/lib64
/usr/local/lib
/usr/local/lib64
/opt/OpenBLAS/lib
$ENV{OpenBLAS}cd
$ENV{OpenBLAS}/lib
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/lib
)
FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS})
FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS})
SET(OpenBLAS_FOUND ON)
# Check include files
IF(NOT OpenBLAS_INCLUDE_DIR)
SET(OpenBLAS_FOUND OFF)
MESSAGE(STATUS "Could not find OpenBLAS include. Turning OpenBLAS_FOUND off")
ENDIF()
# Check libraries
IF(NOT OpenBLAS_LIB)
SET(OpenBLAS_FOUND OFF)
MESSAGE(STATUS "Could not find OpenBLAS lib. Turning OpenBLAS_FOUND off")
ENDIF()
IF (OpenBLAS_FOUND)
IF (NOT OpenBLAS_FIND_QUIETLY)
MESSAGE(STATUS "Found OpenBLAS libraries: ${OpenBLAS_LIB}")
MESSAGE(STATUS "Found OpenBLAS include: ${OpenBLAS_INCLUDE_DIR}")
ENDIF (NOT OpenBLAS_FIND_QUIETLY)
ELSE (OpenBLAS_FOUND)
IF (OpenBLAS_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Could not find OpenBLAS")
ENDIF (OpenBLAS_FIND_REQUIRED)
ENDIF (OpenBLAS_FOUND)
MARK_AS_ADVANCED(
OpenBLAS_INCLUDE_DIR
OpenBLAS_LIB
OpenBLAS
)

@ -0,0 +1,60 @@
macro(_find_file_in_dirs VAR NAME DIRS)
find_path(${VAR} ${NAME} ${DIRS} NO_DEFAULT_PATH)
set(${VAR} ${${VAR}}/${NAME})
unset(${VAR} CACHE)
endmacro()
if(${the_module}_WITH_BLAS)
set(_bp ${the_module}_BLAS) #prefix for blas variables
set(BLAS_CBLAS_H "cblas.h")
set(HAVE_BLAS "")
if(NOT HAVE_BLAS) #check custom BLAS from user input
if(${_bp}_INCLUDE_DIR AND ${_bp}_LIBRARIES AND ${_bp}_CBLAS_H)
set(HAVE_BLAS "Custom")
endif()
endif()
if(NOT HAVE_BLAS)
include(cmake/OpenCVFindMKL.cmake)
if(MKL_FOUND)
set(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIRS})
set(BLAS_LIBRARIES ${MKL_LIBRARIES} )
set(BLAS_CBLAS_H "mkl_cblas.h" )
set(HAVE_BLAS "MKL")
endif()
endif()
if(NOT HAVE_BLAS)
include(cmake/FindOpenBLAS.cmake)
if(OpenBLAS_FOUND)
set(BLAS_INCLUDE_DIR ${OpenBLAS_INCLUDE_DIR} )
set(BLAS_LIBRARIES ${OpenBLAS_LIB} )
set(HAVE_BLAS "OpenBLAS")
endif()
endif()
if(NOT HAVE_BLAS AND UNIX)
include(cmake/FindAtlas.cmake)
if(ATLAS_FOUND)
set(BLAS_INCLUDE_DIR ${Atlas_INCLUDE_DIR})
set(BLAS_LIBRARIES ${Atlas_LIBRARIES} )
set(HAVE_BLAS "Atlas")
endif()
endif()
if(NOT HAVE_BLAS OR NOT (HAVE_BLAS STREQUAL "Custom"))
set(${_bp}_INCLUDE_DIR ${BLAS_INCLUDE_DIR} CACHE PATH "Path to BLAS include dir" FORCE)
set(${_bp}_CBLAS_H ${BLAS_CBLAS_H} CACHE STRING "Alternative name of cblas.h" FORCE)
set(${_bp}_LIBRARIES ${BLAS_LIBRARIES} CACHE FILEPATH "Path to BLAS libraries that will be linked with ${the_module} module" FORCE)
set(${_bp}_BINARIES ${BLAS_BINARIES} CACHE FILEPATH "Path to BLAS binaries (.so, .dll) that will be installed with ${the_module} module" FORCE)
endif()
if(HAVE_BLAS) #adding proxy cblas.h header
_find_file_in_dirs(CBLAS_H_PATH ${${_bp}_CBLAS_H} ${${_bp}_INCLUDE_DIR})
if(NOT CBLAS_H_PATH)
message(WARNING "CBLAS header '${${_bp}_CBLAS_H}' not found into '${${_bp}_INCLUDE_DIR}'")
endif()
set(CBLAS_H_PROXY_PATH ${CMAKE_CURRENT_BINARY_DIR}/opencv_cblas.hpp)
set(_include_str "\#include \"${CBLAS_H_PATH}\"")
file(WRITE ${CBLAS_H_PROXY_PATH} ${_include_str})
endif()
endif()

@ -0,0 +1,123 @@
#
# The script to detect Intel(R) Math Kernel Library (MKL)
# installation/package
#
# Parameters:
# MKL_WITH_TBB
#
# On return this will define:
#
# HAVE_MKL - True if Intel IPP found
# MKL_ROOT_DIR - root of IPP installation
# MKL_INCLUDE_DIRS - IPP include folder
# MKL_LIBRARIES - IPP libraries that are used by OpenCV
#
macro(mkl_fail)
set(HAVE_MKL OFF CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
unset(MKL_INCLUDE_DIRS CACHE)
unset(MKL_LIBRARIES CACHE)
endmacro()
macro(get_mkl_version VERSION_FILE)
# read MKL version info from file
file(STRINGS ${VERSION_FILE} STR1 REGEX "__INTEL_MKL__")
file(STRINGS ${VERSION_FILE} STR2 REGEX "__INTEL_MKL_MINOR__")
file(STRINGS ${VERSION_FILE} STR3 REGEX "__INTEL_MKL_UPDATE__")
#file(STRINGS ${VERSION_FILE} STR4 REGEX "INTEL_MKL_VERSION")
# extract info and assign to variables
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MAJOR ${STR1})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_MINOR ${STR2})
string(REGEX MATCHALL "[0-9]+" MKL_VERSION_UPDATE ${STR3})
set(MKL_VERSION_STR "${MKL_VERSION_MAJOR}.${MKL_VERSION_MINOR}.${MKL_VERSION_UPDATE}" CACHE STRING "MKL version" FORCE)
endmacro()
if(NOT DEFINED MKL_USE_MULTITHREAD)
OCV_OPTION(MKL_WITH_TBB "Use MKL with TBB multithreading" OFF)#ON IF WITH_TBB)
OCV_OPTION(MKL_WITH_OPENMP "Use MKL with OpenMP multithreading" OFF)#ON IF WITH_OPENMP)
endif()
#check current MKL_ROOT_DIR
if(NOT MKL_ROOT_DIR OR NOT EXISTS ${MKL_ROOT_DIR}/include/mkl.h)
set(MKLROOT_PATHS ${MKL_ROOT_DIR})
if(DEFINED $ENV{MKLROOT})
list(APPEND MKLROOT_PATHS $ENV{MKLROOT})
endif()
if(WIN32)
set(ProgramFilesx86 "ProgramFiles(x86)")
list(APPEND MKLROOT_PATHS $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows/mkl)
endif()
if(UNIX)
list(APPEND MKLROOT_PATHS "/opt/intel/mkl")
endif()
find_path(MKL_ROOT_DIR include/mkl.h PATHS ${MKLROOT_PATHS})
endif()
if(NOT MKL_ROOT_DIR)
mkl_fail()
return()
endif()
set(MKL_INCLUDE_DIRS ${MKL_ROOT_DIR}/include)
set(MKL_INCLUDE_HEADERS ${MKL_INCLUDE_DIRS}/mkl.h ${MKL_INCLUDE_DIRS}/mkl_version.h)
#determine arch
if(CMAKE_CXX_SIZEOF_DATA_PTR EQUAL 8)
set(MKL_X64 1)
set(MKL_ARCH "intel64")
include(CheckTypeSize)
CHECK_TYPE_SIZE(int _sizeof_int)
if (_sizeof_int EQUAL 4)
set(MKL_LP64 "lp64")
else()
set(MKL_LP64 "ilp64")
endif()
else()
set(MKL_ARCH "ia32")
endif()
if(MSVC)
set(MKL_EXT ".lib")
set(MKL_PRE "")
else()
set(MKL_EXT ".a")
set(MKL_PRE "lib")
endif()
set(MKL_LIB_DIR ${MKL_ROOT_DIR}/lib/${MKL_ARCH})
set(MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_core${MKL_EXT} ${MKL_LIB_DIR}/${MKL_PRE}mkl_intel_${MKL_LP64}${MKL_EXT})
if(MKL_WITH_TBB)
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_tbb_thread${MKL_EXT})
list(APPEND MKL_LIBRARIES ${MKL_ROOT_DIR}/../tbb/lib/${MKL_ARCH}/tbb${MKL_EXT})
elseif(MKL_WITH_OPENMP)
message(FATAL_ERROR "Multithreaded MKL is not supported yet")
else()
list(APPEND MKL_LIBRARIES ${MKL_LIB_DIR}/${MKL_PRE}mkl_sequential${MKL_EXT})
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MKL MKL_INCLUDE_HEADERS MKL_LIBRARIES)
if(MKL_FOUND)
get_mkl_version(${MKL_INCLUDE_DIRS}/mkl_version.h)
message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
set(HAVE_MKL ON CACHE BOOL "True if MKL found")
set(MKL_ROOT_DIR ${MKL_ROOT_DIR} CACHE PATH "Path to MKL directory")
set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIRS} CACHE PATH "Path to MKL include directory")
if(NOT UNIX)
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE FILEPATH "MKL libarries")
else()
#it's ugly but helps to avoid cyclic lib problem
set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")
set(MKL_LIBRARIES ${MKL_LIBRARIES} CACHE STRING "MKL libarries")
endif()
else()
endif()

@ -0,0 +1,371 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_DNN_ALL_LAYERS_HPP__
#define __OPENCV_DNN_DNN_ALL_LAYERS_HPP__
#include <opencv2/dnn.hpp>
namespace cv
{
namespace dnn
{
//! @addtogroup dnn
//! @{
/** @defgroup dnnLayerList Partial List of Implemented Layers
@{
This subsection of dnn module contains information about bult-in layers and their descriptions.
Classes listed here, in fact, provides C++ API for creating intances of bult-in layers.
In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
You can use both API, but factory API is less convinient for native C++ programming and basically designed for use inside importers (see @ref Importer, @ref createCaffeImporter(), @ref createTorchImporter()).
Bult-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
In partuclar, the following layers and Caffe @ref Importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
- Convolution
- Deconvolution
- Pooling
- InnerProduct
- TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
- Softmax
- Reshape, Flatten, Slice, Split
- LRN
- MVN
- Dropout (since it does nothing on forward pass -))
*/
//! LSTM recurrent layer
class CV_EXPORTS_W LSTMLayer : public Layer
{
public:
/** Creates instance of LSTM layer */
static Ptr<LSTMLayer> create();
/** Set trained weights for LSTM layer.
LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
Than current output and current cell state is computed as follows:
@f{eqnarray*}{
h_t &= o_t \odot tanh(c_t), \\
c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
@f}
where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned wights.
Gates are computed as follows:
@f{eqnarray*}{
i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
g_t &= tanh &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
@f}
where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
@f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
(i.e. @f$W_x@f$ is vertical contacentaion of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
@param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_h @f$)
@param Wx is matrix defining how current input is transformed to internal gates (i.e. according to abovemtioned notation is @f$ W_x @f$)
@param b is bias vector (i.e. according to abovemtioned notation is @f$ b @f$)
*/
virtual void setWeights(const Blob &Wh, const Blob &Wx, const Blob &b) = 0;
/** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
* @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
* where `Wh` is parameter from setWeights().
*/
virtual void setOutShape(const BlobShape &outTailShape = BlobShape::empty()) = 0;
/** @brief Set @f$ h_{t-1} @f$ value that will be used in next forward() calls.
* @details By-default @f$ h_{t-1} @f$ is inited by zeros and updated after each forward() call.
*/
virtual void setH(const Blob &H) = 0;
/** @brief Returns current @f$ h_{t-1} @f$ value (deep copy). */
virtual Blob getH() const = 0;
/** @brief Set @f$ c_{t-1} @f$ value that will be used in next forward() calls.
* @details By-default @f$ c_{t-1} @f$ is inited by zeros and updated after each forward() call.
*/
virtual void setC(const Blob &C) = 0;
/** @brief Returns current @f$ c_{t-1} @f$ value (deep copy). */
virtual Blob getC() const = 0;
/** @brief Specifies either interpet first dimension of input blob as timestamp dimenion either as sample.
*
* If flag is set to true then shape of input blob will be interpeted as [`T`, `N`, `[data dims]`] where `T` specifies number of timpestamps, `N` is number of independent streams.
* In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
*
* If flag is set to false then shape of input blob will be interpeted as [`N`, `[data dims]`].
* In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
*/
virtual void setUseTimstampsDim(bool use = true) = 0;
/** @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
* @details Shape of the second output is the same as first output.
*/
virtual void setProduceCellOutput(bool produce = false) = 0;
/** In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
* @param input should contain packed values @f$x_t@f$
* @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
*
* If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
* where `T` specifies number of timpestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
*
* If setUseTimstampsDim() is set to fase then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
* (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
*/
void forward(std::vector<Blob*> &input, std::vector<Blob> &output);
int inputNameToIndex(String inputName);
int outputNameToIndex(String outputName);
};
//! Classical recurrent layer
class CV_EXPORTS_W RNNLayer : public Layer
{
public:
/** Creates instance of RNNLayer */
static Ptr<RNNLayer> create();
/** Setups learned weights.
Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
@f{eqnarray*}{
h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\
o_t &= tanh&(W_{ho} h_t + b_o),
@f}
@param Wxh is @f$ W_{xh} @f$ matrix
@param bh is @f$ b_{h} @f$ vector
@param Whh is @f$ W_{hh} @f$ matrix
@param Who is @f$ W_{xo} @f$ matrix
@param bo is @f$ b_{o} @f$ vector
*/
virtual void setWeights(const Blob &Wxh, const Blob &bh, const Blob &Whh, const Blob &Who, const Blob &bo) = 0;
/** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
* @details Shape of the second output is the same as first output.
*/
virtual void setProduceHiddenOutput(bool produce = false) = 0;
/** Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
@param input should contain packed input @f$x_t@f$.
@param output should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
@p input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
@p output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
If setProduceHiddenOutput() is set to true then @p output[1] will contain a Blob with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
*/
void forward(std::vector<Blob*> &input, std::vector<Blob> &output);
};
class CV_EXPORTS_W BaseConvolutionLayer : public Layer
{
public:
Size kernel, stride, pad;
};
class CV_EXPORTS_W ConvolutionLayer : public BaseConvolutionLayer
{
public:
static Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
};
class CV_EXPORTS_W DeconvolutionLayer : public BaseConvolutionLayer
{
public:
static Ptr<BaseConvolutionLayer> create(Size kernel = Size(3, 3), Size stride = Size(1, 1), Size pad = Size(0, 0));
};
class CV_EXPORTS_W LRNLayer : public Layer
{
public:
enum Type
{
CHANNEL_NRM,
SPATIAL_NRM
};
int type;
int size;
double alpha, beta;
static Ptr<LRNLayer> create(int type = CHANNEL_NRM, int size = 5, double alpha = 1, double beta = 0.75);
};
class CV_EXPORTS_W PoolingLayer : public Layer
{
public:
enum Type
{
MAX,
AVE,
STOCHASTIC
};
int type;
Size kernel, stride, pad;
static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
};
class CV_EXPORTS_W SoftmaxLayer : public Layer
{
public:
static Ptr<SoftmaxLayer> create(int axis = 1);
};
class CV_EXPORTS_W InnerProductLayer : public Layer
{
public:
int axis;
static Ptr<InnerProductLayer> create(int axis = 1);
};
class CV_EXPORTS_W MVNLayer : public Layer
{
public:
double eps;
bool normVariance, acrossChannels;
static Ptr<MVNLayer> create(bool normVariance = true, bool acrossChannels = false, double eps = 1e-9);
};
/* Reshaping */
class CV_EXPORTS_W ReshapeLayer : public Layer
{
public:
BlobShape newShapeDesc;
Range newShapeRange;
static Ptr<ReshapeLayer> create(const BlobShape &newShape, Range applyingRange = Range::all());
};
class CV_EXPORTS_W ConcatLayer : public Layer
{
public:
int axis;
static Ptr<ConcatLayer> create(int axis = 1);
};
class CV_EXPORTS_W SplitLayer : public Layer
{
public:
int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
static Ptr<SplitLayer> create(int outputsCount = -1);
};
class CV_EXPORTS_W SliceLayer : public Layer
{
public:
int axis;
std::vector<int> sliceIndices;
static Ptr<SliceLayer> create(int axis);
static Ptr<SliceLayer> create(int axis, const std::vector<int> &sliceIndices);
};
/* Activations */
class CV_EXPORTS_W ReLULayer : public Layer
{
public:
double negativeSlope;
static Ptr<ReLULayer> create(double negativeSlope = 0);
};
class CV_EXPORTS_W TanHLayer : public Layer
{
public:
static Ptr<TanHLayer> create();
};
class CV_EXPORTS_W SigmoidLayer : public Layer
{
public:
static Ptr<SigmoidLayer> create();
};
class CV_EXPORTS_W BNLLLayer : public Layer
{
public:
static Ptr<BNLLLayer> create();
};
class CV_EXPORTS_W AbsLayer : public Layer
{
public:
static Ptr<AbsLayer> create();
};
class CV_EXPORTS_W PowerLayer : public Layer
{
public:
double power, scale, shift;
static Ptr<PowerLayer> create(double power = 1, double scale = 1, double shift = 0);
};
//! @}
//! @}
}
}
#endif

@ -44,6 +44,7 @@
#include <opencv2/core.hpp>
#include <vector>
#include <ostream>
#include <iostream>
namespace cv
{
@ -55,13 +56,21 @@ namespace dnn
/** @brief Lightweight class for storing and processing a shape of blob (or anything else). */
struct BlobShape
{
explicit BlobShape(int ndims = 4, int fill = 1); //!< Creates n-dim shape and fill its by @p fill
BlobShape(); //!< Creates [1, 1, 1, 1] shape @todo Make more clearer behavior.
explicit BlobShape(int s0); //!< Creates 1-dim shape [@p s0]
BlobShape(int s0, int s1); //!< @overload
BlobShape(int s0, int s1, int s2); //!< @overload
BlobShape(int num, int cn, int rows, int cols); //!< Creates 4-dim shape [@p num, @p cn, @p rows, @p cols]
BlobShape(int ndims, const int *sizes); //!< Creates n-dim shape from the @p sizes array
//! Creates n-dim shape from the @p sizes array; if @p sizes is NULL then shape will contain unspecified data
BlobShape(int ndims, const int *sizes);
BlobShape(const std::vector<int> &sizes); //!< Creates n-dim shape from the @p sizes vector
template<int n>
BlobShape(const Vec<int, n> &shape); //!< Creates n-dim shape from @ref cv::Vec
//! Creates n-dim shape and fill its by @p fill
static BlobShape all(int ndims, int fill = 1);
/** @brief Returns number of dimensions. */
int dims() const;
@ -88,16 +97,41 @@ namespace dnn
*/
int xsize(int axis) const;
/** @brief Converts @p axis index to canonical format (where 0 <= @p axis < dims()). */
int canonicalAxis(int axis) const;
/** @brief Returns the product of all sizes of axes. */
ptrdiff_t total();
ptrdiff_t total() const;
/** @brief Computes the product of sizes of axes among the specified axes range [@p startAxis; @p endAxis).
* @details Negative axis indexing can be used. @sa Blob::total(int,int)
*/
ptrdiff_t total(int startAxis, int endAxis = INT_MAX) const;
/** @brief Constructs new shape from axes in range [@p startAxis; @p endAxis).
* @details Negative axis indexing can be used. @sa Blob::total(int,int)
*/
BlobShape slice(int startAxis, int endAxis = INT_MAX) const;
/** @brief Returns pointer to the first element of continuous size array. */
const int *ptr() const;
/** @overload */
int *ptr();
bool equal(const BlobShape &other) const; //!< Checks equality of two shapes.
bool operator== (const BlobShape &r) const; //!< @sa equal()
/** @brief Checks equality of two shapes. */
bool equal(const BlobShape &other) const;
BlobShape operator+ (const BlobShape &r) const; //!< Contacenates two shapes.
bool operator== (const BlobShape &r) const;
static BlobShape like(const Mat &m); //!< Returns shape of passed Mat.
static BlobShape like(const UMat &m); //!< Returns shape of passed UMat.
static BlobShape empty(); //!< Returns empty shape [].
bool isEmpty() const; //!< Returns true if shape is empty (i.e []).
#ifdef CV_CXX_MOVE_SEMANTICS
//TBD
#endif
private:
cv::AutoBuffer<int,4> sz;
@ -109,34 +143,57 @@ namespace dnn
* The class is realized as a wrapper over @ref cv::Mat and @ref cv::UMat.
* It will support methods for switching and logical synchronization between CPU and GPU.
*/
class CV_EXPORTS Blob
class CV_EXPORTS_W Blob
{
public:
explicit Blob();
Blob();
/** @brief Constructs blob with specified @p shape and @p type. */
explicit Blob(const BlobShape &shape, int type = CV_32F);
explicit Blob(const BlobShape &shape, int type = CV_32F, int allocFlags = ALLOC_MAT);
/** @brief Constructs Blob from existing Mat or UMat. */
Blob(InputArray data);
/** @brief Constucts 4-dimensional blob (so-called batch) from image or array of images.
* @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of images)
* @param dstCn specify size of second axis of ouptut blob
*/
explicit Blob(InputArray image, int dstCn = -1);
* @param image 2-dimensional multi-channel or 3-dimensional single-channel image (or array of such images)
* @param dstCn specifies size of second axis of ouptut blob
*/
static Blob fromImages(InputArray image, int dstCn = -1);
/** @brief Works like Blob::fromImages() but in-place. */
void batchFromImages(InputArray image, int dstCn = -1);
/** @brief Creates blob with specified @p shape and @p type. */
void create(const BlobShape &shape, int type = CV_32F);
void create(const BlobShape &shape, int type = CV_32F, int allocFlags = ALLOC_MAT);
/** @brief Creates blob from cv::Mat or cv::UMat without copying the data */
/** @brief Creates blob from Mat or UMat without copying the data.
* @details If in is Mat then Mat data is populated, otherwise - UMat.
*/
void fill(InputArray in);
/** @brief Creates blob from user data.
* @details If @p deepCopy is false then CPU data will not be allocated.
*/
void fill(const BlobShape &shape, int type, void *data, bool deepCopy = true);
Mat& matRef(); //!< Returns reference to cv::Mat, containing blob data.
const Mat& matRefConst() const; //!< Returns reference to cv::Mat, containing blob data, for read-only purposes.
UMat &umatRef(); //!< Returns reference to cv::UMat, containing blob data (not implemented yet).
const UMat &umatRefConst() const; //!< Returns reference to cv::UMat, containing blob data, for read-only purposes (not implemented yet).
/** @brief Sets @p value to the last used data (if @p allocFlags = -1).
* @details If @p allocFlags != -1 then destination data (Mat or UMat) is determined by flags from AllocFlag enum like in create().
*/
void setTo(InputArray value, int allocFlags = -1);
Mat& matRef(bool writeOnly = true); //!< Returns reference to cv::Mat, containing blob data.
const Mat& matRefConst() const; //!< Returns reference to cv::Mat, containing blob data, for read-only purposes.
UMat &umatRef(bool writeOnly = true); //!< Returns reference to cv::UMat, containing blob data.
const UMat &umatRefConst() const; //!< Returns reference to cv::UMat, containing blob data, for read-only purposes.
template<typename XMat>
XMat &getRef(bool writeOnly = true);
template<typename XMat>
const XMat &getRefConst() const;
void updateMat(bool syncData = true) const; //!< Actualizes data stored inside Mat of Blob; if @p syncData is false then only shape will be actualized.
void updateUMat(bool syncData = true) const; //!< Actualizes data stored inside Mat of Blob; if @p syncData is false then only shape will be actualized.
void sync() const; //!< Updates Mat and UMat of Blob.
/** @brief Returns number of blob dimensions. */
int dims() const;
@ -163,7 +220,7 @@ namespace dnn
*/
size_t total(int startAxis = 0, int endAxis = INT_MAX) const;
/** @brief Converts @p axis index to canonical format (where 0 <= axis < dims()). */
/** @brief Converts @p axis index to canonical format (where 0 <= @p axis < dims()). */
int canonicalAxis(int axis) const;
/** @brief Returns shape of the blob. */
@ -172,11 +229,6 @@ namespace dnn
/** @brief Checks equality of two blobs shapes. */
bool equalShape(const Blob &other) const;
/** @brief Returns slice of first two dimensions.
* @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
*/
Mat getPlane(int n, int cn);
/* Shape getters of 4-dimensional blobs. */
int cols() const; //!< Returns size of the fourth axis blob.
int rows() const; //!< Returns size of the thrid axis blob.
@ -204,12 +256,18 @@ namespace dnn
*/
uchar *ptr(int n = 0, int cn = 0, int row = 0, int col = 0);
/** @overload */
template<typename TFloat>
TFloat *ptr(int n = 0, int cn = 0, int row = 0, int col = 0);
template<typename Type>
Type *ptr(int n = 0, int cn = 0, int row = 0, int col = 0);
/** @overload ptr<float>() */
float *ptrf(int n = 0, int cn = 0, int row = 0, int col = 0);
//TODO: add const ptr methods
/** @brief Returns slice of first two dimensions.
* @details The behaviour is similar to the following numpy code: blob[n, cn, ...]
* @todo Method will be removed. Use slice() from shape_utils.hpp.
*/
Mat getPlane(int n, int cn);
/** @brief Shares data from other @p blob.
* @returns *this
*/
@ -220,13 +278,52 @@ namespace dnn
*/
Blob &reshape(const BlobShape &shape);
/** @brief Returns type of the blob. */
int type() const;
/** @brief Changes shape of the blob without copying the data.
* @returns shallow copy of original blob with new shape.
*/
Blob reshaped(const BlobShape &newShape) const;
int type() const; //!< Returns type of the blob.
int elemSize() const; //!< Returns size of single element in bytes.
int getState() const; //!< Returns current state of the blob, @see DataState.
private:
const int *sizes() const;
# define CV_DNN_UMAT //DBG
#ifdef HAVE_OPENCL
# define CV_DNN_UMAT
#endif
#ifdef CV_DNN_UMAT
# define CV_DNN_UMAT_ONLY(expr) (expr)
#else
# define CV_DNN_UMAT_ONLY(expr)
#endif
#ifndef CV_DNN_UMAT
Mat m;
#else
mutable Mat m;
mutable UMat um;
mutable uchar state;
#endif
public:
enum DataState
{
UNINITIALIZED,
HEAD_AT_MAT,
HEAD_AT_UMAT,
SYNCED
};
enum AllocFlag
{
ALLOC_MAT = 1,
ALLOC_UMAT = 2,
ALLOC_BOTH = 3
};
};
//! @}

@ -48,20 +48,50 @@ namespace cv
namespace dnn
{
inline BlobShape::BlobShape(int ndims, int fill) : sz( (size_t)std::max(ndims, 0) )
inline BlobShape::BlobShape()
{
sz.allocate(4);
for (size_t i = 0; i < sz.size(); i++)
sz[i] = 1;
}
inline BlobShape BlobShape::all(int ndims, int fill)
{
CV_Assert(ndims >= 0);
BlobShape res;
res.sz.allocate(ndims);
for (int i = 0; i < ndims; i++)
sz[i] = fill;
res.sz[i] = fill;
return res;
}
inline BlobShape::BlobShape(int ndims, const int *sizes) : sz( (size_t)std::max(ndims, 0) )
{
CV_Assert(ndims >= 0);
if (!sizes)
return;
for (int i = 0; i < ndims; i++)
sz[i] = sizes[i];
}
inline BlobShape::BlobShape(int s0) : sz(1)
{
sz[0] = s0;
}
inline BlobShape::BlobShape(int s0, int s1) : sz(2)
{
sz[0] = s0;
sz[1] = s1;
}
inline BlobShape::BlobShape(int s0, int s1, int s2) : sz(3)
{
sz[0] = s0;
sz[1] = s1;
sz[2] = s2;
}
inline BlobShape::BlobShape(int num, int cn, int rows, int cols) : sz(4)
{
sz[0] = num;
@ -120,7 +150,13 @@ inline int &BlobShape::operator[] (int axis)
return sz[(axis < 0) ? axis + dims() : axis];
}
inline ptrdiff_t BlobShape::total()
inline int BlobShape::canonicalAxis(int axis) const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
}
inline ptrdiff_t BlobShape::total() const
{
if (dims() == 0)
return 0;
@ -131,11 +167,52 @@ inline ptrdiff_t BlobShape::total()
return res;
}
inline ptrdiff_t BlobShape::total(int startAxis, int endAxis) const
{
if (isEmpty())
return 0;
if (endAxis == INT_MAX)
endAxis = dims();
else if (endAxis < 0)
endAxis += dims();
startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
ptrdiff_t res = 1;
for (int i = startAxis; i < endAxis; i++)
res *= sz[i];
return res;
}
inline BlobShape BlobShape::slice(int startAxis, int endAxis) const
{
if (isEmpty())
return BlobShape::empty();
if (endAxis == INT_MAX)
endAxis = dims();
else if (endAxis < 0)
endAxis += dims();
startAxis = (startAxis < 0) ? startAxis + dims() : startAxis;
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
BlobShape res(endAxis - startAxis, (const int*)NULL);
for (int i = startAxis; i < endAxis; i++)
res[i - startAxis] = sz[i];
return res;
}
inline const int *BlobShape::ptr() const
{
return sz;
}
inline int *BlobShape::ptr()
{
return sz;
}
inline bool BlobShape::equal(const BlobShape &other) const
{
if (this->dims() != other.dims())
@ -155,19 +232,83 @@ inline bool BlobShape::operator==(const BlobShape &r) const
return this->equal(r);
}
inline BlobShape BlobShape::like(const Mat &m)
{
return BlobShape(m.dims, (const int*)m.size);
}
inline BlobShape BlobShape::like(const UMat &m)
{
return BlobShape(m.dims, (const int*)m.size);
}
inline BlobShape BlobShape::empty()
{
return BlobShape(0, (const int*)NULL);
}
inline bool BlobShape::isEmpty() const
{
return dims() == 0;
}
inline BlobShape BlobShape::operator+(const BlobShape &r) const
{
BlobShape newShape(this->dims() + r.dims(), (int*)NULL);
for (int i = 0; i < this->dims(); i++)
newShape[i] = (*this)[i];
for (int i = 0; i < r.dims(); i++)
newShape[this->dims() + i] = r[i];
return newShape;
}
CV_EXPORTS std::ostream &operator<< (std::ostream &stream, const BlobShape &shape);
/////////////////////////////////////////////////////////////////////
inline int Blob::canonicalAxis(int axis) const
#ifndef CV_DNN_UMAT
# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) (cpu_expr)
#else
# define CV_DNN_SWITCH_MU(cpu_expr, gpu_expr) ((state == HEAD_AT_UMAT) ? (gpu_expr) : (cpu_expr))
#endif
inline int Blob::dims() const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
return CV_DNN_SWITCH_MU(m.dims, um.dims);
}
inline int Blob::dims() const
inline const int * Blob::sizes() const
{
return m.dims;
return CV_DNN_SWITCH_MU((const int*)m.size, (const int*)um.size);
}
inline int Blob::type() const
{
return CV_DNN_SWITCH_MU(m.type(), um.type());
}
template<int n>
inline size_t Blob::offset(const Vec<int, n> &pos) const
{
const MatStep &step = CV_DNN_SWITCH_MU(m.step, um.step);
size_t ofs = 0;
int i;
for (i = 0; i < std::min(n, dims()); i++)
{
CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i));
ofs += step[i] * pos[i];
}
for (; i < dims(); i++)
CV_DbgAssert(pos[i] == 0);
CV_DbgAssert(ofs % elemSize() == 0);
return ofs / elemSize();
}
inline int Blob::canonicalAxis(int axis) const
{
CV_Assert(-dims() <= axis && axis < dims());
return (axis < 0) ? axis + dims() : axis;
}
inline int Blob::xsize(int axis) const
@ -196,27 +337,11 @@ inline size_t Blob::total(int startAxis, int endAxis) const
CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
size_t size = 1; //fix: assume that slice isn't empty
size_t cnt = 1; //fix: assume that slice isn't empty
for (int i = startAxis; i < endAxis; i++)
size *= (size_t)sizes()[i];
cnt *= (size_t)sizes()[i];
return size;
}
template<int n>
inline size_t Blob::offset(const Vec<int, n> &pos) const
{
size_t ofs = 0;
int i;
for (i = 0; i < std::min(n, dims()); i++)
{
CV_DbgAssert(pos[i] >= 0 && pos[i] < size(i));
ofs = ofs * (size_t)size(i) + pos[i];
}
for (; i < dims(); i++)
ofs *= (size_t)size(i);
return ofs;
return cnt;
}
inline size_t Blob::offset(int n, int cn, int row, int col) const
@ -226,20 +351,20 @@ inline size_t Blob::offset(int n, int cn, int row, int col) const
inline float *Blob::ptrf(int n, int cn, int row, int col)
{
CV_Assert(type() == CV_32F);
return (float*)m.data + offset(n, cn, row, col);
return matRef(false).ptr<float>() + offset(n, cn, row, col);
}
inline uchar *Blob::ptr(int n, int cn, int row, int col)
{
return m.data + m.elemSize() * offset(n, cn, row, col);
Mat &mat = matRef(false);
return mat.ptr() + mat.elemSize() * offset(n, cn, row, col);
}
template<typename TFloat>
inline TFloat* Blob::ptr(int n, int cn, int row, int col)
template<typename Dtype>
inline Dtype* Blob::ptr(int n, int cn, int row, int col)
{
CV_Assert(type() == cv::DataDepth<TFloat>::value);
return (TFloat*) ptr(n, cn, row, col);
CV_Assert(type() == cv::DataDepth<Dtype>::value);
return (Dtype*) ptr(n, cn, row, col);
}
inline BlobShape Blob::shape() const
@ -260,26 +385,69 @@ inline bool Blob::equalShape(const Blob &other) const
return true;
}
inline Mat& Blob::matRef()
inline Mat& Blob::matRef(bool writeOnly)
{
#ifdef CV_DNN_UMAT
updateMat(!writeOnly);
state = HEAD_AT_MAT;
#else
(void)writeOnly;
#endif
return m;
}
inline const Mat& Blob::matRefConst() const
{
CV_DNN_UMAT_ONLY( updateMat() );
return m;
}
inline UMat &Blob::umatRef()
inline UMat &Blob::umatRef(bool writeOnly)
{
CV_Error(Error::StsNotImplemented, "");
#ifndef CV_DNN_UMAT
CV_Error(Error::GpuNotSupported, "");
(void)writeOnly;
return *(new UMat());
#else
updateUMat(!writeOnly);
state = HEAD_AT_UMAT;
return um;
#endif
}
inline const UMat &Blob::umatRefConst() const
{
CV_Error(Error::StsNotImplemented, "");
#ifndef CV_DNN_UMAT
CV_Error(Error::GpuNotSupported, "");
return *(new UMat());
#else
updateUMat();
return um;
#endif
}
template<>
inline Mat &Blob::getRef<Mat>(bool writeOnly)
{
return matRef(writeOnly);
}
template<>
inline UMat &Blob::getRef<UMat>(bool writeOnly)
{
return umatRef(writeOnly);
}
template<>
inline const Mat &Blob::getRefConst<Mat>() const
{
return matRefConst();
}
template<>
inline const UMat &Blob::getRefConst<UMat>() const
{
return umatRefConst();
}
inline Mat Blob::getPlane(int n, int cn)
@ -313,27 +481,44 @@ inline Size Blob::size2() const
return Size(cols(), rows());
}
inline int Blob::type() const
inline Blob &Blob::shareFrom(const Blob &blob)
{
return m.depth();
this->m = blob.m;
#ifdef CV_DNN_UMAT
this->um = blob.um;
this->state = blob.state;
#endif
return *this;
}
inline const int * Blob::sizes() const
inline Blob &Blob::reshape(const BlobShape &newShape)
{
return &m.size[0];
if (!m.empty()) m = m.reshape(1, newShape.dims(), newShape.ptr());
#ifdef CV_DNN_UMAT
if (!um.empty()) um = um.reshape(1, newShape.dims(), newShape.ptr());
#endif
return *this;
}
inline Blob Blob::reshaped(const BlobShape &newShape) const
{
Blob res(*this); //also, res.shareFrom(*this) could be used
res.reshape(newShape);
return res;
}
inline Blob &Blob::shareFrom(const Blob &blob)
inline int Blob::elemSize() const
{
this->m = blob.m;
return *this;
return CV_ELEM_SIZE(type());
}
inline Blob &Blob::reshape(const BlobShape &shape)
inline int Blob::getState() const
{
m = m.reshape(1, shape.dims(), shape.ptr());
return *this;
#ifdef CV_DNN_UMAT
return this->state;
#else
return m.empty() ? UNINITIALIZED : HEAD_AT_MAT;
#endif
}
}

@ -95,10 +95,10 @@ private:
AutoBuffer<int64, 1> *pi;
AutoBuffer<double, 1> *pd;
AutoBuffer<String, 1> *ps;
void *p;
void *pv;
};
DictValue(int _type, void *_p) : type(_type), p(_p) {}
DictValue(int _type, void *_p) : type(_type), pv(_p) {}
void release();
};

@ -59,15 +59,17 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* This function automatically called on most of OpenCV builds,
* but you need to call it manually on some specific configurations (iOS for example).
*/
CV_EXPORTS void initModule();
CV_EXPORTS_W void initModule();
/** @brief This class provides all data needed to initialize layer.
*
* It includes dictionary with scalar params (which can be readed by using Dict interface),
* blob params #blobs and optional meta information: #name and #type of layer instance.
*/
struct CV_EXPORTS LayerParams : public Dict
class CV_EXPORTS LayerParams : public Dict
{
public:
//TODO: Add ability to name blob params
std::vector<Blob> blobs; //!< List of learned parameters stored as blobs.
String name; //!< Name of the layer instance (optional, can be used internal purposes).
@ -77,10 +79,12 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
/** @brief This interface class allows to build new Layers - are building blocks of networks.
*
* Each class, derived from Layer, must implement allocate() methods to declare own outputs and forward() to compute outputs.
* Also before using the new layer into networks you must register your layer by using one of @ref LayerFactoryModule "LayerFactory" macros.
* Also before using the new layer into networks you must register your layer by using one of @ref dnnLayerFactory "LayerFactory" macros.
*/
struct CV_EXPORTS Layer
class CV_EXPORTS_W Layer
{
public:
//! List of learned parameters must be stored here to allow read them by using Net::getParam().
std::vector<Blob> blobs;
@ -116,7 +120,8 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
String type; //!< Type name which was used for creating layer by layer factory.
Layer();
explicit Layer(const LayerParams &params); //!< Initialize only #name, #type and #blobs fields.
explicit Layer(const LayerParams &params); //!< Initializes only #name, #type and #blobs fields.
void setParamsFrom(const LayerParams &params); //!< Initializes only #name, #type and #blobs fields.
virtual ~Layer();
};
@ -130,7 +135,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
*
* This class supports reference counting of its instances, i. e. copies point to the same instance.
*/
class CV_EXPORTS Net
class CV_EXPORTS_W Net
{
public:
@ -174,6 +179,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @see setNetInputs(), Layer::inputNameToIndex(), Layer::outputNameToIndex()
*/
void connect(String outPin, String inpPin);
/** @brief Connects #@p outNum output of the first layer to #@p inNum input of the second layer.
* @param outLayerId identifier of the first layer
* @param inpLayerId identifier of the second layer
@ -181,6 +187,7 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param inpNum number of the second layer input
*/
void connect(int outLayerId, int outNum, int inpLayerId, int inpNum);
/** @brief Sets ouputs names of the network input pseudo layer.
*
* Each net always has special own the network input pseudo layer with id=0.
@ -267,10 +274,10 @@ namespace dnn //! This namespace is used for dnn module functionlaity.
* @param isBinary specifies whether the network was serialized in ascii mode or binary.
* @returns Pointer to the created importer, NULL in failure cases.
*
* @warning Torch7 importer is experimental now, you need explicitly set CMake opencv_dnn_BUILD_TORCH_IMPORTER flag to compile its.
* @warning Torch7 importer is experimental now, you need explicitly set CMake `opencv_dnn_BUILD_TORCH_IMPORTER` flag to compile its.
*
* @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use long type of C language,
* which has different bit-length on different systems.
* @note Ascii mode of Torch serializer is more preferable, because binary mode extensively use `long` type of C language,
* which has various bit-length on different systems.
*
* The loading file must contain serialized <a href="https://github.com/torch/nn/blob/master/doc/module.md">nn.Module</a> object
* with importing network. Try to eliminate a custom objects from serialazing data to avoid importing errors.

@ -86,7 +86,7 @@ inline DictValue DictValue::get<DictValue>(int idx) const
template<>
inline int64 DictValue::get<int64>(int idx) const
{
CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size());
CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
idx = (idx == -1) ? 0 : idx;
if (type == Param::INT)
@ -131,7 +131,7 @@ inline bool DictValue::get<bool>(int idx) const
template<>
inline double DictValue::get<double>(int idx) const
{
CV_Assert(idx == -1 && size() == 1 || idx >= 0 && idx < size());
CV_Assert((idx == -1 && size() == 1) || (idx >= 0 && idx < size()));
idx = (idx == -1) ? 0 : idx;
if (type == Param::REAL)
@ -159,7 +159,7 @@ template<>
inline String DictValue::get<String>(int idx) const
{
CV_Assert(isString());
CV_Assert(idx == -1 && ps->size() == 1 || idx >= 0 && idx < (int)ps->size());
CV_Assert((idx == -1 && ps->size() == 1) || (idx >= 0 && idx < (int)ps->size()));
return (*ps)[(idx == -1) ? 0 : idx];
}

@ -50,7 +50,7 @@ namespace dnn
//! @addtogroup dnn
//! @{
//!
//! @defgroup LayerFactoryModule Utilities for new layers registration
//! @defgroup dnnLayerFactory Utilities for New Layers Registration
//! @{
/** @brief %Layer factory allows to create instances of registered layers. */
@ -86,7 +86,7 @@ private:
* @details This macros must be placed inside the function code.
*/
#define REG_RUNTIME_LAYER_FUNC(type, constuctorFunc) \
LayerFactory::registerLayer(#type, constuctorFunc);
cv::dnn::LayerFactory::registerLayer(#type, constuctorFunc);
/** @brief Registers layer class in runtime.
* @param type string, containing type name of the layer.
@ -94,7 +94,7 @@ private:
* @details This macros must be placed inside the function code.
*/
#define REG_RUNTIME_LAYER_CLASS(type, class) \
LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>);
cv::dnn::LayerFactory::registerLayer(#type, _layerDynamicRegisterer<class>);
/** @brief Registers layer constructor on module load time.
* @param type string, containing type name of the layer.
@ -102,7 +102,7 @@ private:
* @details This macros must be placed outside the function code.
*/
#define REG_STATIC_LAYER_FUNC(type, constuctorFunc) \
static _LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
static cv::dnn::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc);
/** @brief Registers layer class on module load time.
* @param type string, containing type name of the layer.
@ -126,14 +126,15 @@ Ptr<Layer> _layerDynamicRegisterer(LayerParams &params)
}
//allows automatically register created layer on module load time
struct _LayerStaticRegisterer
class _LayerStaticRegisterer
{
String type;
public:
_LayerStaticRegisterer(const String &type, LayerFactory::Constuctor constuctor)
_LayerStaticRegisterer(const String &layerType, LayerFactory::Constuctor layerConstuctor)
{
this->type = type;
LayerFactory::registerLayer(type, constuctor);
this->type = layerType;
LayerFactory::registerLayer(layerType, layerConstuctor);
}
~_LayerStaticRegisterer()

@ -0,0 +1,137 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_DNN_SHAPE_UTILS_HPP__
#define __OPENCV_DNN_DNN_SHAPE_UTILS_HPP__
#include <opencv2/core.hpp>
#include <ostream>
namespace cv {
namespace dnn {
//Useful shortcut
typedef BlobShape Shape;
inline std::ostream &operator<< (std::ostream &s, cv::Range &r)
{
return s << "[" << r.start << ", " << r.end << ")";
}
//Reshaping
//TODO: add -1 specifier for automatic size inferring
template<typename Mat>
void reshape(Mat &m, const BlobShape &shape)
{
m = m.reshape(1, shape.dims(), shape.ptr());
}
template<typename Mat>
Mat reshaped(const Mat &m, const BlobShape &shape)
{
return m.reshape(1, shape.dims(), shape.ptr());
}
//Slicing
struct _Range : public cv::Range
{
_Range(const Range &r) : cv::Range(r) {}
_Range(int start, int size = 1) : cv::Range(start, start + size) {}
};
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0)
{
//CV_Assert(m.dims >= 1);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 1; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1)
{
CV_Assert(m.dims >= 2);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 2; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2)
{
CV_Assert(m.dims <= 3);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 3; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
ranges[2] = r2;
return m(&ranges[0]);
}
template<typename Mat>
Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const _Range &r2, const _Range &r3)
{
CV_Assert(m.dims <= 4);
cv::AutoBuffer<cv::Range, 4> ranges(m.dims);
for (int i = 4; i < m.dims; i++)
ranges[i] = Range::all();
ranges[0] = r0;
ranges[1] = r1;
ranges[2] = r2;
ranges[3] = r3;
return m(&ranges[0]);
}
BlobShape computeShapeByReshapeMask(const BlobShape &srcShape, const BlobShape &maskShape, Range srcRange = Range::all());
}
}
#endif

@ -0,0 +1,80 @@
#include "perf_precomp.hpp"
namespace cvtest
{
using std::tr1::tuple;
using std::tr1::get;
using std::tr1::make_tuple;
using std::make_pair;
using namespace perf;
using namespace testing;
using namespace cv;
using namespace cv::dnn;
enum {STRIDE_OFF = 1, STRIDE_ON = 2};
CV_ENUM(StrideSize, STRIDE_OFF, STRIDE_ON);
enum {GROUP_OFF = 1, GROUP_2 = 2};
CV_ENUM(GroupSize, GROUP_OFF, GROUP_2);
//Squared Size
#define SSZ(n) cv::Size(n, n)
typedef std::pair<BlobShape, int> InpShapeNumOut;
typedef tuple<Size, InpShapeNumOut, GroupSize, StrideSize> ConvParam; //kernel_size, inp shape, groups, stride
typedef TestBaseWithParam<ConvParam> ConvolutionPerfTest;
PERF_TEST_P( ConvolutionPerfTest, perf, Combine(
Values(Size(1, 1), Size(3, 3), Size(5, 5), Size(11, 11)),
Values(make_pair(BlobShape(1, 4, 224, 224), 64),
make_pair(BlobShape(1, 64, 112, 122), 128),
make_pair(BlobShape(1, 256, 28, 28), 512)),
GroupSize::all(),
StrideSize::all())
)
{
RNG rng(0);
ConvParam params = GetParam();
int ksz = get<0>(params).width;
BlobShape inpShape = get<1>(params).first;
int outCn = get<1>(params).second;
int groups = get<2>(params);
int stride = (ksz >= 11) ? 4 : (int)get<3>(params);
int inpCn = inpShape[1];
Blob wgtBlob(BlobShape(outCn, inpCn/groups, ksz, ksz)), biasBlob(BlobShape(outCn, 1, 1, 1));
Blob inpBlob(inpShape);
rng.fill(biasBlob.matRef(), RNG::UNIFORM, -1, +1);
rng.fill(wgtBlob.matRef(), RNG::UNIFORM, -1, +1);
rng.fill(inpBlob.matRef(), RNG::UNIFORM, -1, +1);
LayerParams lp;
lp.set("num_output", outCn);
lp.set("group", groups);
lp.set("stride", stride);
lp.set("kernel_size", ksz);
lp.blobs.reserve(2);
lp.blobs.push_back(wgtBlob);
lp.blobs.push_back(biasBlob);
std::vector<Blob*> inpBlobs(1, &inpBlob);
std::vector<Blob> outBlobs;
cv::setNumThreads(cv::getNumberOfCPUs());
Ptr<Layer> layer = cv::dnn::LayerFactory::createLayerInstance("Convolution", lp);
layer->allocate(inpBlobs, outBlobs);
declare.in(inpBlob.matRef(), wgtBlob.matRef(), WARMUP_RNG).out(outBlobs[0].matRef()).tbb_threads(cv::getNumThreads());
TEST_CYCLE_N(10)
{
layer->forward(inpBlobs, outBlobs);
}
SANITY_CHECK_NOTHING();
}
}

@ -0,0 +1,3 @@
#include "perf_precomp.hpp"
CV_PERF_TEST_MAIN(dnn)

@ -0,0 +1,17 @@
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#endif
#ifndef __OPENCV_PERF_PRECOMP_HPP__
#define __OPENCV_PERF_PRECOMP_HPP__
#include <opencv2/ts.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#endif

@ -0,0 +1 @@
*.caffemodel

@ -124,8 +124,8 @@ int main(int argc, char **argv)
exit(-1);
}
resize(img, img, Size(224, 224)); //GoogLeNet accepts only 224x224 RGB-images
dnn::Blob inputBlob = dnn::Blob(img); //Convert Mat to dnn::Blob image batch
resize(img, img, Size(224, 224)); //GoogLeNet accepts only 224x224 RGB-images
dnn::Blob inputBlob = dnn::Blob::fromImages(img); //Convert Mat to dnn::Blob batch of images
//! [Prepare blob]
//! [Set input blob]

@ -40,179 +40,383 @@
//M*/
#include "precomp.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
Blob::Blob()
{
int zeros[4] = { 0, 0, 0, 0 };
m = Mat(4, zeros, CV_32F, NULL);
}
Blob::Blob()
{
CV_DNN_UMAT_ONLY(state = UNINITIALIZED);
}
static inline int getMatChannels(const Mat &mat)
Blob::Blob(const BlobShape &shape, int type, int allocFlags)
{
CV_DNN_UMAT_ONLY(state = UNINITIALIZED);
this->create(shape, type, allocFlags);
}
Blob::Blob(InputArray data)
{
#ifndef CV_DNN_UMAT
m = data.getMat();
#else
CV_Assert(data.isMat() || data.isUMat());
if (data.isMat())
{
return (mat.dims <= 2) ? mat.channels() : mat.size[0];
m = data.getMat();
state = HEAD_AT_MAT;
}
static BlobShape getBlobShape(std::vector<Mat> &vmat, int requestedCn = -1)
else
{
BlobShape shape(4);
int cnSum = 0, matCn;
um = data.getUMat();
state = HEAD_AT_UMAT;
}
#endif
}
CV_Assert(vmat.size() > 0);
void Blob::create(const BlobShape &shape, int type, int allocFlags)
{
#ifndef CV_DNN_UMAT
CV_Assert(allocFlags & ALLOC_MAT);
m.create(shape.dims(), shape.ptr(), type);
#else
CV_Assert(allocFlags & ALLOC_MAT || allocFlags & ALLOC_UMAT);
for (size_t i = 0; i < vmat.size(); i++)
{
Mat &mat = vmat[i];
CV_Assert(!mat.empty());
CV_Assert((mat.dims == 3 && mat.channels() == 1) || mat.dims <= 2);
if (allocFlags & ALLOC_MAT)
m.create(shape.dims(), shape.ptr(), type);
if (allocFlags & ALLOC_UMAT)
um.create(shape.dims(), shape.ptr(), type);
matCn = getMatChannels(mat);
cnSum += getMatChannels(mat);
if (state == UNINITIALIZED)
{
if (allocFlags & ALLOC_MAT && allocFlags & ALLOC_UMAT)
state = SYNCED;
else if (allocFlags & ALLOC_MAT)
state = HEAD_AT_MAT;
else
state = HEAD_AT_UMAT;
}
#endif
}
if (i == 0)
{
shape[-1] = mat.cols;
shape[-2] = mat.rows;
shape[-3] = (requestedCn <= 0) ? matCn : requestedCn;
}
else
{
if (mat.cols != shape[-1] || mat.rows != shape[-2])
CV_Error(Error::StsError, "Each Mat.size() must be equal");
void Blob::fill(InputArray in)
{
#ifdef CV_DNN_UMAT
CV_Assert(in.isMat() || in.isUMat());
if (in.isMat())
{
m = in.getMat();
state = HEAD_AT_MAT;
}
else
{
um = in.getUMat();
state = HEAD_AT_UMAT;
}
#else
CV_Assert(in.isMat());
m = in.getMat();
#endif
}
if (requestedCn <= 0 && matCn != shape[-3])
CV_Error(Error::StsError, "Each Mat.chnannels() (or number of planes) must be equal");
}
}
static inline int getMatChannels(const Mat &mat)
{
return (mat.dims <= 2) ? mat.channels() : mat.size[0];
}
if (cnSum % shape[-3] != 0)
CV_Error(Error::StsError, "Total number of channels in vector is not a multiple of requsted channel number");
static BlobShape getBlobShape(std::vector<Mat> &vmat, int requestedCn = -1)
{
BlobShape shape(BlobShape::all(4));
int cnSum = 0, matCn;
shape[0] = cnSum / shape[-3];
return shape;
}
CV_Assert(vmat.size() > 0);
static std::vector<Mat> extractMatVector(InputArray in)
for (size_t i = 0; i < vmat.size(); i++)
{
if (in.isMat() || in.isUMat())
{
return std::vector<Mat>(1, in.getMat());
}
else if (in.isMatVector())
{
return *static_cast<const std::vector<Mat>*>(in.getObj());
}
else if (in.isUMatVector())
Mat &mat = vmat[i];
CV_Assert(!mat.empty());
CV_Assert((mat.dims == 3 && mat.channels() == 1) || mat.dims <= 2);
matCn = getMatChannels(mat);
cnSum += getMatChannels(mat);
if (i == 0)
{
std::vector<Mat> vmat;
in.getMatVector(vmat);
return vmat;
shape[-1] = mat.cols;
shape[-2] = mat.rows;
shape[-3] = (requestedCn <= 0) ? matCn : requestedCn;
}
else
{
CV_Assert(in.isMat() || in.isMatVector() || in.isUMat() || in.isUMatVector());
return std::vector<Mat>();
if (mat.cols != shape[-1] || mat.rows != shape[-2])
CV_Error(Error::StsError, "Each Mat.size() must be equal");
if (requestedCn <= 0 && matCn != shape[-3])
CV_Error(Error::StsError, "Each Mat.chnannels() (or number of planes) must be equal");
}
}
Blob::Blob(InputArray image, int dstCn)
if (cnSum % shape[-3] != 0)
CV_Error(Error::StsError, "Total number of channels in vector is not a multiple of requsted channel number");
shape[0] = cnSum / shape[-3];
return shape;
}
static std::vector<Mat> extractMatVector(InputArray in)
{
if (in.isMat() || in.isUMat())
{
return std::vector<Mat>(1, in.getMat());
}
else if (in.isMatVector())
{
return *static_cast<const std::vector<Mat>*>(in.getObj());
}
else if (in.isUMatVector())
{
CV_Assert(dstCn == -1 || dstCn > 0);
std::vector<Mat> inMats = extractMatVector(image);
BlobShape dstShape = getBlobShape(inMats, dstCn);
std::vector<Mat> vmat;
in.getMatVector(vmat);
return vmat;
}
else
{
CV_Assert(in.isMat() || in.isMatVector() || in.isUMat() || in.isUMatVector());
return std::vector<Mat>();
}
}
m.create(dstShape.dims(), dstShape.ptr(), CV_32F);
void Blob::batchFromImages(InputArray image, int dstCn)
{
CV_Assert(dstCn == -1 || dstCn > 0);
std::vector<Mat> inMats = extractMatVector(image);
BlobShape dstShape = getBlobShape(inMats, dstCn);
std::vector<Mat> wrapBuf(dstShape[-3]);
int elemSize = (int)m.elemSize();
uchar *ptr = this->ptr();
for (size_t i = 0; i < inMats.size(); i++)
{
Mat inMat = inMats[i];
int dtype = CV_32F;
this->create(dstShape, dtype, ALLOC_MAT);
uchar *dstPtr = this->matRef().ptr();
int elemSize = CV_ELEM_SIZE(dtype);
if (inMat.dims <= 2)
{
inMat.convertTo(inMat, m.type());
std::vector<Mat> wrapBuf(dstShape[-3]);
for (size_t i = 0; i < inMats.size(); i++)
{
Mat inMat = inMats[i];
wrapBuf.resize(0);
for (int cn = 0; cn < inMat.channels(); cn++)
{
wrapBuf.push_back(Mat(inMat.rows, inMat.cols, m.type(), ptr));
ptr += elemSize * inMat.total();
}
if (inMat.dims <= 2)
{
inMat.convertTo(inMat, dtype);
cv::split(inMat, wrapBuf);
}
else
wrapBuf.resize(0);
for (int cn = 0; cn < inMat.channels(); cn++)
{
inMat.convertTo(Mat(inMat.dims, inMat.size, m.type(), ptr), m.type());
ptr += elemSize * inMat.total();
wrapBuf.push_back(Mat(inMat.rows, inMat.cols, dtype, dstPtr));
dstPtr += elemSize * inMat.total();
}
cv::split(inMat, wrapBuf);
}
else
{
inMat.convertTo(Mat(inMat.dims, inMat.size, dtype, dstPtr), dtype);
dstPtr += elemSize * inMat.total();
}
}
}
Blob Blob::fromImages(InputArray image, int dstCn)
{
Blob res;
res.batchFromImages(image, dstCn);
return res;
}
Blob::Blob(const BlobShape &shape, int type)
void Blob::fill(const BlobShape &shape, int type, void *data, bool deepCopy)
{
if (deepCopy)
{
this->create(shape, type);
create(shape, type);
memcpy(ptr(), data, this->total() * CV_ELEM_SIZE(type));
}
void Blob::fill(const BlobShape &shape, int type, void *data, bool deepCopy)
else
{
CV_Assert(type == CV_32F || type == CV_64F);
m = Mat(shape.dims(), shape.ptr(), type, data);
}
CV_DNN_UMAT_ONLY(state = HEAD_AT_MAT);
}
if (deepCopy)
{
m.create(shape.dims(), shape.ptr(), type);
memcpy(m.data, data, m.total() * m.elemSize());
}
else
void Blob::setTo(InputArray value, int allocFlags)
{
#ifdef CV_DNN_UMAT
if (allocFlags == -1)
{
if (state == HEAD_AT_UMAT)
um.setTo(value);
else if (state == HEAD_AT_MAT)
m.setTo(value);
else //SYNCED or UNINITIALIZED
{
m = Mat(shape.dims(), shape.ptr(), type, data);
um.setTo(value);
m.setTo(value);
if (state == UNINITIALIZED)
state = SYNCED;
}
}
else if (allocFlags == ALLOC_BOTH)
{
m.setTo(value);
um.setTo(value);
state = SYNCED;
}
else if (allocFlags == ALLOC_MAT)
{
matRef().setTo(value);
}
else if (allocFlags == ALLOC_UMAT)
{
umatRef().setTo(value);
}
else
{
CV_Error(Error::StsBadArg, "allocFlags sholud be -1 or one of Blob::AllocFlag values");
}
#else
m.setTo(value);
#endif
}
void Blob::create(const BlobShape &shape, int type)
void Blob::updateMat(bool syncData) const
{
#ifdef CV_DNN_UMAT
if (state == UNINITIALIZED || state == SYNCED || state == HEAD_AT_MAT)
{
CV_Assert(type == CV_32F || type == CV_64F);
m.create(shape.dims(), shape.ptr(), type);
return;
}
else if (state == HEAD_AT_UMAT)
{
if (syncData)
um.copyTo(m);
else
m.create(dims(), sizes(), type());
state = SYNCED;
}
else
{
CV_Error(Error::StsInternal, "");
}
#else
(void)syncData;
#endif
}
inline void squeezeShape(const int srcDims, const int *srcSizes, const int dstDims, int *dstSizes)
void Blob::updateUMat(bool syncData) const
{
#ifdef CV_DNN_UMAT
if (state == UNINITIALIZED || state == SYNCED || state == HEAD_AT_UMAT)
{
return;
}
else if (state == HEAD_AT_MAT)
{
const int m = std::min(dstDims, srcDims);
if (syncData)
m.copyTo(um);
else
um.create(dims(), sizes(), type());
}
else
{
CV_Error(Error::StsInternal, "");
}
#else
(void)syncData;
#endif
}
void Blob::sync() const
{
updateMat();
updateUMat();
}
//copy common(last) dimensions
for (int i = 0; i < m; i++)
dstSizes[dstDims - 1 - i] = srcSizes[srcDims - 1 - i];
Vec4i Blob::shape4() const
{
return Vec4i(num(), channels(), rows(), cols());
}
//either flatten extra dimensions
for (int i = m; i < srcDims; i++)
dstSizes[0] *= srcSizes[srcDims - 1 - i];
//BlobShape
//either fill gaps
for (int i = m; i < dstDims; i++)
dstSizes[dstDims - 1 - i] = 1;
}
std::ostream &operator<< (std::ostream &stream, const BlobShape &shape)
{
stream << "[";
Vec4i Blob::shape4() const
for (int i = 0; i < shape.dims() - 1; i++)
stream << shape[i] << ", ";
if (shape.dims() > 0)
stream << shape[-1];
return stream << "]";
}
BlobShape computeShapeByReshapeMask(const BlobShape &srcShape, const BlobShape &maskShape, Range srcRange /*= Range::all()*/)
{
if (srcRange == Range::all())
srcRange = Range(0, srcShape.dims());
else
{
return Vec4i(num(), channels(), rows(), cols());
int sz = srcRange.size();
srcRange.start = srcShape.canonicalAxis(srcRange.start);
srcRange.end = (srcRange.end == INT_MAX) ? srcShape.dims() : srcRange.start + sz;
}
std::ostream &operator<< (std::ostream &stream, const BlobShape &shape)
CV_Assert(0 <= srcRange.start && srcRange.start <= srcRange.end && srcRange.end <= srcShape.dims());
BlobShape dstShape(srcShape.dims() - srcRange.size() + maskShape.dims(), (const int*)NULL);
std::copy(srcShape.ptr(), srcShape.ptr() + srcRange.start, dstShape.ptr());
std::copy(srcShape.ptr() + srcRange.end, srcShape.ptr() + srcShape.dims(), dstShape.ptr() + srcRange.start + maskShape.dims());
int inferDim = -1;
for (int i = 0; i < maskShape.dims(); i++)
{
stream << "[";
if (maskShape[i] > 0)
{
dstShape[srcRange.start + i] = maskShape[i];
}
else if (maskShape[i] == 0)
{
if (srcRange.start + i >= srcShape.dims())
CV_Error(Error::StsBadArg, format("Copy dim[%d] (which has zero size) is out of the source shape bounds", srcRange.start + i));
dstShape[srcRange.start + i] = srcShape[srcRange.start + i];
}
else if (maskShape[i] == -1)
{
if (inferDim != -1)
CV_Error(Error::StsAssert, "Duplicate of inferred dim (which is denoted by -1)");
inferDim = srcRange.start + i;
dstShape[inferDim] = 1;
}
else
CV_Error(Error::StsBadArg, "maskShape[i] >= -1");
}
for (int i = 0; i < shape.dims() - 1; i++)
stream << shape[i] << ", ";
if (shape.dims() > 0)
stream << shape[-1];
if (inferDim != -1)
{
ptrdiff_t srcTotal = srcShape.total();
ptrdiff_t dstTotal = dstShape.total();
if (srcTotal % dstTotal != 0)
CV_Error(Error::StsBackTrace, "Can't infer a dim denoted by -1");
return stream << "]";
dstShape[inferDim] = (int)(srcTotal / dstTotal);
}
else
{
CV_Assert(srcShape.total() == dstShape.total());
}
return dstShape;
}
}
}

@ -191,7 +191,7 @@ namespace
else if (pbBlob.has_shape())
{
const caffe::BlobShape &_shape = pbBlob.shape();
BlobShape shape(_shape.dim_size());
BlobShape shape = BlobShape::all(_shape.dim_size());
for (int i = 0; i < _shape.dim_size(); i++)
shape[i] = (int)_shape.dim(i);
@ -201,7 +201,7 @@ namespace
else
{
CV_Error(Error::StsError, "Unknown shape of input blob");
return BlobShape(-1);
return BlobShape();
}
}

@ -0,0 +1,294 @@
#include "../precomp.hpp"
#include "layer_loaders.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <climits>
namespace cv
{
namespace dnn
{
//Utils
//Extracts params used into Conv, Deconv and Pooling layers
static void getCaffeConvParams(LayerParams &params, Size &kernel, Size &pad, Size &stride)
{
if (params.has("kernel_h") && params.has("kernel_w"))
{
kernel.height = params.get<int>("kernel_h");
kernel.width = params.get<int>("kernel_w");
}
else if (params.has("kernel_size"))
{
kernel.height = kernel.width = params.get<int>("kernel_size");
}
else
{
CV_Error(Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
CV_Assert(kernel.height > 0 && kernel.width > 0);
if (params.has("pad_h") && params.has("pad_w"))
{
pad.height = params.get<int>("pad_h");
pad.width = params.get<int>("pad_w");
}
else
{
pad.height = pad.width = params.get<int>("pad", 0);
}
CV_Assert(pad.height >= 0 && pad.width >= 0);
if (params.has("stride_h") && params.has("stride_w"))
{
stride.height = params.get<int>("stride_h");
stride.width = params.get<int>("stride_w");
}
else
{
stride.height = stride.width = params.get<int>("stride", 1);
}
CV_Assert(stride.height > 0 && stride.width > 0);
}
//Layers
//Convolution and Deconvolution
static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, LayerParams &params)
{
l->setParamsFrom(params);
getCaffeConvParams(params, l->kernel, l->pad, l->stride);
bool bias = params.get<bool>("bias_term", true);
int numOutput = params.get<int>("num_output");
int group = params.get<int>("group", 1);
CV_Assert(numOutput % group == 0);
CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
}
template<>
Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams &params)
{
Ptr<BaseConvolutionLayer> l = ConvolutionLayer::create();
initConvDeconvLayerFromCaffe(l, params);
return Ptr<Layer>(l);
}
template<>
Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams &params)
{
Ptr<BaseConvolutionLayer> l = DeconvolutionLayer::create();
initConvDeconvLayerFromCaffe(l, params);
return Ptr<Layer>(l);
}
template<>
Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams &params)
{
int type;
Size kernel, stride, pad;
if (params.has("pool"))
{
String pool = params.get<String>("pool").toLowerCase();
if (pool == "max")
type = PoolingLayer::MAX;
else if (pool == "ave")
type = PoolingLayer::AVE;
else if (pool == "stochastic")
type = PoolingLayer::STOCHASTIC;
else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
}
else
{
type = PoolingLayer::MAX;
}
getCaffeConvParams(params, kernel, pad, stride);
return Ptr<Layer>(PoolingLayer::create(type, kernel, stride, pad));
}
template<>
Ptr<Layer> createLayerFromCaffe<SoftmaxLayer>(LayerParams &params)
{
int axis = params.get<int>("axis", 1);
return Ptr<Layer>(SoftmaxLayer::create(axis));
}
template<> //InnerProduct specialization
Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams &params)
{
const std::vector<Blob> &blobs = params.blobs;
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
int numOutputs = params.get<int>("num_output");
int innerSize = (int)blobs[0].total() / numOutputs;
bool bias = params.get<bool>("bias_term", true);
int axis = params.get<int>("axis", 1);
CV_Assert(blobs[0].dims() >= 2 && (size_t)(innerSize * numOutputs) == blobs[0].total());
CV_Assert(!bias || (blobs.size() == 2 && (size_t)numOutputs == blobs[1].total()));
Ptr<InnerProductLayer> l = InnerProductLayer::create(axis);
l->setParamsFrom(params);
l->blobs[0].reshape(Shape(numOutputs, innerSize));
if (bias)
l->blobs[1].reshape(Shape(1, numOutputs));
return Ptr<Layer>(l);
}
template<> //LRNLayer specialization
Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams& params)
{
int type;
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = LRNLayer::CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = LRNLayer::SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
int size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
double alpha = params.get<double>("alpha", 1);
double beta = params.get<double>("beta", 0.75);
return Ptr<Layer>(LRNLayer::create(type, size, alpha, beta));
}
template<>
Ptr<Layer> createLayerFromCaffe<MVNLayer>(LayerParams &params)
{
return Ptr<Layer>(MVNLayer::create(
params.get<bool>("normalize_variance", true),
params.get<bool>("across_channels", false),
params.get<double>("eps", 1e-9)
));
}
/* Reshape layers */
template<>
Ptr<Layer> createLayerFromCaffe<ReshapeLayer>(LayerParams &params)
{
int axis = params.get<int>("axis", 0);
int numAxes = params.get<int>("num_axes", -1);
CV_Assert(numAxes >= -1);
Range applyingRange = (numAxes == -1) ? Range(axis, INT_MAX) : Range(axis, axis + numAxes);
Shape newShape;
if (params.has("dim"))
{
const DictValue &paramShape = params.get("dim");
newShape = Shape::all(paramShape.size());
for (int i = 0; i < paramShape.size(); i++)
newShape[i] = paramShape.get<int>(i);
}
else
newShape = Shape::all(0);
return Ptr<Layer>(ReshapeLayer::create(newShape, applyingRange));
}
Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ReshapeLayer::create(Shape(0, -1)));
}
template<>
Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams& params)
{
return Ptr<Layer>(ConcatLayer::create(params.get<int>("axis", 1)));
}
template<>
Ptr<Layer> createLayerFromCaffe<SplitLayer>(LayerParams &params)
{
int outputsCount;
//TODO: maybe "top_count" param is useless because it can be determined by output connections number
if (params.has("top_count"))
{
outputsCount = params.get<int>("top_count");
CV_Assert(outputsCount >= 0);
}
else
{
outputsCount = -1;
}
return Ptr<Layer>(SplitLayer::create(outputsCount));
}
template<>
Ptr<Layer> createLayerFromCaffe<SliceLayer>(LayerParams& params)
{
int axis = params.get<int>("axis", 1);
if (!params.has("slice_point"))
{
return Ptr<Layer>(SliceLayer::create(axis));
}
else
{
const DictValue &indicesValue = params.get("slice_point");
std::vector<int> sliceIndices(indicesValue.size());
for (int i = 0; i < indicesValue.size(); i++)
sliceIndices[i] = indicesValue.get<int>(i);
return Ptr<Layer>(SliceLayer::create(axis, sliceIndices));
}
}
/* Activation layers */
template <typename ActivationLayer> //Intended for parameters-free activations
Ptr<Layer> createLayerFromCaffe(LayerParams&)
{
return Ptr<Layer>(ActivationLayer::create());
}
template<> //ReLU specialization
Ptr<Layer> createLayerFromCaffe<ReLULayer>(LayerParams& params)
{
float negative_slope = params.get<float>("negative_slope", 0.f);
return Ptr<Layer>(ReLULayer::create(negative_slope));
}
template<> //Power specialization
Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams& params)
{
float power = params.get<float>("power", 1.0f);
float scale = params.get<float>("scale", 1.0f);
float shift = params.get<float>("shift", 0.0f);
return Ptr<Layer>(PowerLayer::create(power, scale, shift));
}
//Explicit instantiation
template Ptr<Layer> createLayerFromCaffe<ConvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SoftmaxLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<MVNLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ConcatLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SliceLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SplitLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<ReLULayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<SigmoidLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<TanHLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<AbsLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<BNLLLayer>(LayerParams&);
template Ptr<Layer> createLayerFromCaffe<PowerLayer>(LayerParams&);
}
}

@ -0,0 +1,60 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__
#define __OPENCV_DNN_CAFFE_LAYER_LOADERS_HPP__
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
//Common template for Caffe layer loaders
template <typename PublicLayer>
Ptr<Layer> createLayerFromCaffe(LayerParams&);
Ptr<Layer> createFlattenLayerFromCaffe(LayerParams&);
}
}
#endif

@ -543,6 +543,13 @@ Layer::Layer(const LayerParams &params)
}
void Layer::setParamsFrom(const LayerParams &params)
{
blobs = params.blobs;
name = params.name;
type = params.type;
}
int Layer::inputNameToIndex(String)
{
return -1;

@ -40,19 +40,8 @@
//M*/
#include "precomp.hpp"
#include "layers/concat_layer.hpp"
#include "layers/convolution_layer.hpp"
#include "caffe/layer_loaders.hpp"
#include "layers/blank_layer.hpp"
#include "layers/elementwise_layers.hpp"
#include "layers/fully_connected_layer.hpp"
#include "layers/lrn_layer.hpp"
#include "layers/mvn_layer.hpp"
#include "layers/pooling_layer.hpp"
#include "layers/reshape_layer.hpp"
#include "layers/slice_layer.hpp"
#include "layers/softmax_layer.hpp"
#include "layers/split_layer.hpp"
namespace cv
{
@ -76,27 +65,27 @@ void initModule()
if (init.status)
return;
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer)
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
REG_RUNTIME_LAYER_CLASS(Pooling, PoolingLayer)
REG_RUNTIME_LAYER_CLASS(MVN, MVNLayer)
REG_RUNTIME_LAYER_CLASS(LRN, LRNLayer)
REG_RUNTIME_LAYER_CLASS(InnerProduct, FullyConnectedLayer)
REG_RUNTIME_LAYER_FUNC(Slice, createLayerFromCaffe<SliceLayer>);
REG_RUNTIME_LAYER_FUNC(Split, createLayerFromCaffe<SplitLayer>);
REG_RUNTIME_LAYER_FUNC(Concat, createLayerFromCaffe<ConcatLayer>);
REG_RUNTIME_LAYER_FUNC(Reshape, createLayerFromCaffe<ReshapeLayer>);
REG_RUNTIME_LAYER_FUNC(Flatten, createFlattenLayerFromCaffe);
REG_RUNTIME_LAYER_CLASS(ReLU, ElementWiseLayer<ReLUFunctor>)
REG_RUNTIME_LAYER_CLASS(TanH, ElementWiseLayer<TanHFunctor>)
REG_RUNTIME_LAYER_CLASS(BNLL, ElementWiseLayer<BNLLFunctor>)
REG_RUNTIME_LAYER_CLASS(Power, ElementWiseLayer<PowerFunctor>)
REG_RUNTIME_LAYER_CLASS(AbsVal, ElementWiseLayer<AbsValFunctor>)
REG_RUNTIME_LAYER_CLASS(Sigmoid, ElementWiseLayer<SigmoidFunctor>)
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
REG_RUNTIME_LAYER_FUNC(Convolution, createLayerFromCaffe<ConvolutionLayer>);
REG_RUNTIME_LAYER_FUNC(Deconvolution, createLayerFromCaffe<DeconvolutionLayer>);
REG_RUNTIME_LAYER_FUNC(Pooling, createLayerFromCaffe<PoolingLayer>);
REG_RUNTIME_LAYER_FUNC(LRN, createLayerFromCaffe<LRNLayer>);
REG_RUNTIME_LAYER_FUNC(InnerProduct, createLayerFromCaffe<InnerProductLayer>);
REG_RUNTIME_LAYER_FUNC(Softmax, createLayerFromCaffe<SoftmaxLayer>);
REG_RUNTIME_LAYER_FUNC(MVN, createLayerFromCaffe<MVNLayer>);
REG_RUNTIME_LAYER_CLASS(Convolution, ConvolutionLayer)
REG_RUNTIME_LAYER_CLASS(Deconvolution, DeConvolutionLayer)
REG_RUNTIME_LAYER_CLASS(Concat, ConcatLayer)
REG_RUNTIME_LAYER_FUNC(ReLU, createLayerFromCaffe<ReLULayer>);
REG_RUNTIME_LAYER_FUNC(Sigmoid, createLayerFromCaffe<SigmoidLayer>);
REG_RUNTIME_LAYER_FUNC(TanH, createLayerFromCaffe<TanHLayer>);
REG_RUNTIME_LAYER_FUNC(BNLL, createLayerFromCaffe<BNLLLayer>);
REG_RUNTIME_LAYER_FUNC(AbsVal, createLayerFromCaffe<AbsLayer>);
REG_RUNTIME_LAYER_FUNC(Power, createLayerFromCaffe<PowerLayer>);
REG_RUNTIME_LAYER_CLASS(Dropout, BlankLayer)
init.status = true;
}

@ -42,60 +42,80 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "concat_layer.hpp"
#include <opencv2/core/ocl.hpp>
namespace cv
{
namespace dnn
{
ConcatLayer::ConcatLayer(LayerParams &params) : Layer(params)
{
axis = params.get<int>("axis", 1);
CV_Assert(axis >= 0);
}
void ConcatLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
ConcatLayerImpl::ConcatLayerImpl(int axis_ /*= 1*/)
{
axis = axis_;
}
int refType = inputs[0]->type();
BlobShape refShape = inputs[0]->shape();
CV_Assert(axis < refShape.dims());
void ConcatLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
int axisSum = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
BlobShape curShape = inputs[i]->shape();
BlobShape refShape = inputs[0]->shape();
axisIdx = inputs[0]->canonicalAxis(axis);
CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == refType);
for (int axisId = 0; axisId < refShape.dims(); axisId++)
{
if (axisId != axis && refShape[axisId] != curShape[axisId])
CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
}
int axisSum = 0;
useOpenCL = false;
for (size_t i = 0; i < inputs.size(); i++)
{
BlobShape curShape = inputs[i]->shape();
axisSum += curShape[axis];
CV_Assert(curShape.dims() == refShape.dims() && inputs[i]->type() == inputs[0]->type());
for (int curAxis = 0; curAxis < refShape.dims(); curAxis++)
{
if (curAxis != axisIdx && refShape[curAxis] != curShape[curAxis])
CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
}
refShape[axis] = axisSum;
outputs.resize(1);
outputs[0].create(refShape);
axisSum += curShape[axisIdx];
useOpenCL |= inputs[i]->getState() == Blob::HEAD_AT_MAT;
}
void ConcatLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
const Mat& outMat = outputs[0].matRef();
std::vector<Range> ranges(outputs[0].dims(), Range::all());
int sizeStart = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
int sizeEnd = sizeStart + inputs[i]->size(axis);
ranges[axis] = Range(sizeStart, sizeEnd);
refShape[axisIdx] = axisSum;
useOpenCL &= ocl::useOpenCL();
int allocFlags = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
Mat outSubMat = outMat(&ranges[0]);
inputs[i]->matRef().copyTo(outSubMat);
outputs.resize(1);
outputs[0].create(refShape, inputs[0]->type(), allocFlags);
}
sizeStart = sizeEnd;
}
void ConcatLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forward_<UMat>(inputs, outputs);
else
#endif
forward_<Mat>(inputs, outputs);
}
template<typename XMat>
void ConcatLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
XMat& outMat = outputs[0].getRef<XMat>();
std::vector<Range> ranges(outputs[0].dims(), Range::all());
ranges[axisIdx].start = 0;
for (size_t i = 0; i < inputs.size(); i++)
{
ranges[axisIdx].end = ranges[axisIdx].start + inputs[i]->size(axisIdx);
inputs[i]->getRefConst<XMat>().copyTo(outMat(&ranges[0]));
ranges[axisIdx].start = ranges[axisIdx].end;
}
}
Ptr<ConcatLayer> ConcatLayer::create(int axis)
{
return Ptr<ConcatLayer>(new ConcatLayerImpl(axis));
}
}
}

@ -42,20 +42,29 @@
#ifndef __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CONCAT_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class ConcatLayer : public Layer
{
int axis;
public:
ConcatLayer(LayerParams& params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
class ConcatLayerImpl : public ConcatLayer
{
bool useOpenCL;
int axisIdx;
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
public:
ConcatLayerImpl(int axis_ = 1);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif

@ -43,209 +43,295 @@
#include <opencv2/core/ocl.hpp>
#include "layers_common.hpp"
#include "convolution_layer.hpp"
#include "im2col.hpp"
#include "op_im2col.hpp"
#include "op_blas.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <iostream>
namespace cv
{
namespace dnn
{
ConvolutionLayer::ConvolutionLayer(LayerParams &params) : Layer(params)
{
getKernelParams(params, kerH, kerW, padH, padW, strideH, strideW);
numOutput = params.get<int>("num_output");
bias = params.get<bool>("bias_term", true);
group = params.get<int>("group", 1);
CV_Assert(numOutput % group == 0);
CV_Assert(!bias || blobs.size() == 2);
CV_Assert( bias || blobs.size() == 1);
const Blob &wgtBlob = blobs[0];
CV_Assert(wgtBlob.dims() == 4 && wgtBlob.cols() == kerW && wgtBlob.rows() == kerH);
ConvolutionLayerImpl::ConvolutionLayerImpl()
{
tryUseOpenCL = true;
numOutput = -1;
group = -1;
if (bias)
#if HAVE_CBLAS
if (getBlasThreads() != cv::getThreadNum())
{
Blob &biasBlob = blobs[1];
CV_Assert(biasBlob.total() == (size_t)numOutput);
setBlasThreads(cv::getThreadNum());
}
#endif
}
//TBD
useOpenCL = params.has("use_opencl");
}
void ConvolutionLayerImpl::init()
{
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
void ConvolutionLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
bias = (blobs.size() >= 2);
numOutput = blobs[0].num();
const Blob &inpBlob = *inputs[0];
CV_Assert(inpBlob.dims() == 4 && inpBlob.type() == CV_32F);
computeInpOutShape(inpBlob);
CV_Assert(blobs[0].dims() == 4 && blobs[0].cols() == kernel.width && blobs[0].rows() == kernel.height);
CV_Assert(!bias || blobs[1].total() == (size_t)blobs[0].num());
CV_Assert(inpCn % group == 0 && outCn % group == 0);
CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group);
useOpenCL = ocl::useOpenCL() && tryUseOpenCL;
}
outGroupCn = outCn / group;
inpGroupCn = inpCn / group;
ksize = inpGroupCn * kerH * kerW;
void ConvolutionLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
init();
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->type() == inpBlob.type());
CV_Assert(inputs[i]->dims() == 4 && inputs[i]->channels() == inpBlob.channels());
CV_Assert(inputs[i]->rows() == inpBlob.rows() && inputs[i]->cols() == inpBlob.cols());
CV_Assert(inputs.size() > 0);
const Blob &input = *inputs[0];
CV_Assert(input.dims() == 4 && (input.type() == CV_32F || input.type() == CV_64F));
computeInpOutShape(input);
outputs[i].create(BlobShape(inputs[i]->num(), topCn, topH, topW));
}
group = inpCn / blobs[0].channels();
CV_Assert(inpCn % group == 0 && outCn % group == 0);
CV_Assert(blobs[0].num() == outCn && blobs[0].channels() == inpCn / group);
if (!is1x1())
colMat.create(ksize, outH * outW, inpBlob.type());
outGroupCn = outCn / group;
inpGroupCn = inpCn / group;
ksize = inpGroupCn * kernel.height * kernel.width;
if (bias)
biasOnesMat = Mat::ones(1, topH * topW, inpBlob.type());
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->type() == input.type());
CV_Assert(inputs[i]->dims() == 4 && inputs[i]->channels() == input.channels());
CV_Assert(inputs[i]->rows() == input.rows() && inputs[i]->cols() == input.cols());
}
inline bool ConvolutionLayer::is1x1() const
int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
if (!is1x1())
{
return (kerH == 1 && kerW == 1);
colBlob.create(Shape(ksize, outH * outW), input.type(), allocFlags);
}
void ConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
if (bias)
{
Blob &wgtBlob = blobs[0];
biasOnesBlob.create(Shape(1, topH * topW), input.type(), allocFlags);
biasOnesBlob.setTo(1);
}
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Blob &inpBlob = *inputs[ii];
Blob &outBlob = outputs[ii];
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
outputs[i].create(Shape(inputs[i]->num(), topCn, topH, topW), input.type(), allocFlags);
}
}
bool ConvolutionLayerImpl::is1x1() const
{
return (kernel.height == 1 && kernel.width == 1) &&
(stride.height == 1 && stride.width == 1);
}
template<typename XMat>
void ConvolutionLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
XMat biasesMat = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
for (int n = 0; n < inpBlob.num(); n++)
for (size_t ii = 0; ii < outputs.size(); ii++)
{
int numImg = inputs[ii]->size(0);
XMat inpMat = inputs[ii]->getRefConst<XMat>();
XMat outMat = reshaped(outputs[ii].getRef<XMat>(), Shape(numImg*group*outGroupCn, outH*outW));
for (int n = 0; n < numImg; n++)
{
for (int g = 0; g < group; g++)
{
for (int g = 0; g < group; g++)
{
im2col(inpBlob, n, g);
XMat colMat, curInp = slice(inpMat, n, _Range(g * inpGroupCn, inpGroupCn));
im2col(curInp, colMat);
_Range kerRange(g * outGroupCn, outGroupCn);
XMat kerMat = weightsMat.rowRange(kerRange);
Mat kerMat(outGroupCn, ksize, wgtBlob.type(), wgtBlob.ptr(g*outGroupCn));
Mat dstMat(outGroupCn, outH*outW, outBlob.type(), outBlob.ptr(n, g*outGroupCn));
_Range outRange((g + n * group) * outGroupCn, outGroupCn);
XMat dstMat = outMat.rowRange(outRange);
cv::gemm(kerMat, colMat, 1, noArray(), 0, dstMat);
dnn::gemm(kerMat, colMat, 1, dstMat, 0);
if (bias)
{
float *biasPtr = blobs[1].ptrf() + g*outGroupCn;
Mat biasMat(outGroupCn, 1, CV_32F, biasPtr);
cv::gemm(biasMat, biasOnesMat, 1, dstMat, 1, dstMat);
}
if (bias)
{
dnn::gemm(biasesMat.rowRange(kerRange), biasOnesBlob.getRefConst<XMat>(), 1, dstMat, 1);
}
}
}
}
}
void ConvolutionLayer::im2col(Blob &inpBlob, int imNum, int cnGroup)
{
uchar *srcPtr = inpBlob.ptr(imNum, cnGroup*inpGroupCn);
if (is1x1())
{
colMat = Mat(ksize, inpBlob.rows()*inpBlob.cols(), inpBlob.type(), srcPtr);
return;
}
void ConvolutionLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
if (!useOpenCL)
forward_<Mat>(inputs, outputs);
else
forward_<UMat>(inputs, outputs);
}
void ConvolutionLayerImpl::im2col(const UMat &srcImg, UMat &dstCol)
{
if (is1x1())
{
dstCol = reshaped(srcImg, Shape(ksize, outH*outW));
return;
}
#ifdef HAVE_OPENCL
if (useOpenCL && ocl::useOpenCL() && inpBlob.type() == CV_32F && !is1x1())
{
std::vector<Range> ranges(4, Range::all());
ranges[0] = Range(imNum, imNum+1);
ranges[1] = Range(cnGroup*inpGroupCn, (cnGroup + 1)*inpGroupCn);
UMat src = inpBlob.matRef()(&ranges[0]).getUMat(ACCESS_READ);
UMat dst(colMat.size(), colMat.type());
im2col_ocl(src, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, dst);
dst.copyTo(colMat);
return;
}
#endif // HAVE_OPENCL
CV_Assert(im2col_ocl(srcImg, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, this->colBlob.umatRef()));
dstCol = this->colBlob.umatRefConst();
#else
CV_Error(Error::StsInternal, "");
dstCol = srcImg; //supress warning
#endif
}
if (inpBlob.type() == CV_32F)
im2col_cpu((float *)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float *)colMat.ptr());
if (inpBlob.type() == CV_64F)
im2col_cpu((double*)srcPtr, inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (double*)colMat.ptr());
void ConvolutionLayerImpl::im2col(const Mat &srcImg, Mat &dstCol)
{
if (is1x1())
{
dstCol = reshaped(srcImg, Shape(ksize, outH*outW));
return;
}
void ConvolutionLayer::computeInpOutShape(const Blob &inpBlob)
{
inpH = inpBlob.rows();
inpW = inpBlob.cols();
inpCn = inpBlob.channels();
Mat &colMat = colBlob.matRef();
if (srcImg.type() == CV_32F)
im2col_CpuPBody<float>::run(srcImg.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<float>());
if (srcImg.type() == CV_64F)
im2col_CpuPBody<double>::run(srcImg.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, colMat.ptr<double>());
outH = (inpH + 2 * padH - kerH) / strideH + 1;
outW = (inpW + 2 * padW - kerW) / strideW + 1;
outCn = numOutput;
dstCol = colMat;
}
topH = outH; topW = outW; topCn = outCn;
}
void ConvolutionLayerImpl::computeInpOutShape(const Blob &input)
{
inpH = input.rows();
inpW = input.cols();
inpCn = input.channels();
DeConvolutionLayer::DeConvolutionLayer(LayerParams &params)
: ConvolutionLayer(params) {}
outH = (inpH + 2 * pad.height - kernel.height) / stride.height + 1;
outW = (inpW + 2 * pad.width - kernel.width) / stride.width + 1;
outCn = numOutput;
void DeConvolutionLayer::computeInpOutShape(const Blob &inpBlob)
{
outH = inpBlob.rows();
outW = inpBlob.cols();
outCn = inpBlob.channels();
topH = outH; topW = outW; topCn = outCn;
}
inpH = strideH * (outH - 1) + kerH - 2 * padH;
inpW = strideW * (outW - 1) + kerW - 2 * padW;
inpCn = numOutput;
//Deconvolution
topH = inpH; topW = inpW; topCn = inpCn;
}
DeConvolutionLayerImpl::DeConvolutionLayerImpl()
{
}
void DeConvolutionLayerImpl::computeInpOutShape(const Blob &inpBlob)
{
outH = inpBlob.rows();
outW = inpBlob.cols();
outCn = inpBlob.channels();
inpH = stride.height * (outH - 1) + kernel.height - 2 * pad.height;
inpW = stride.width * (outW - 1) + kernel.width - 2 * pad.width;
inpCn = numOutput;
topH = inpH; topW = inpW; topCn = inpCn;
}
void DeConvolutionLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
if (!useOpenCL)
forward_<Mat>(inputs, outputs);
else
forward_<UMat>(inputs, outputs);
}
void DeConvolutionLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
template<typename XMat>
void DeConvolutionLayerImpl::forward_(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
XMat weightsMat = reshaped(blobs[0].getRefConst<XMat>(), Shape(outCn, ksize));
XMat biasesMat = reshaped(blobs[1].getRefConst<XMat>(), Shape(outCn, 1));
for (size_t ii = 0; ii < outputs.size(); ii++)
{
Blob &wghtBlob = blobs[0];
int numImg = inputs[ii]->size(0);
XMat convBlob = reshaped(inputs[ii]->getRefConst<XMat>(), Shape(numImg*outCn, outH*outW));
XMat decnBlob = reshaped(outputs[ii].getRef<XMat>(), Shape(numImg*inpCn, inpH*inpW));
for (size_t ii = 0; ii < outputs.size(); ii++)
for (int n = 0; n < numImg; n++)
{
Blob &convBlob = *inputs[ii];
Blob &decnBlob = outputs[ii];
for (int n = 0; n < convBlob.num(); n++)
for (int g = 0; g < group; g++)
{
for (int g = 0; g < group; g++)
{
Mat dstMat(inpGroupCn, inpH*inpW, decnBlob.type(), decnBlob.ptr(n, g*inpGroupCn));
XMat dstMat = decnBlob.rowRange(_Range((g + n * group) * inpGroupCn, inpGroupCn));
XMat &colMat = (is1x1()) ? dstMat : colBlob.getRef<XMat>();
if (is1x1())
colMat = dstMat;
XMat convMat = convBlob.rowRange(_Range((g + n * group) * outGroupCn, outGroupCn));
XMat wghtMat = weightsMat.rowRange(_Range(g * outGroupCn, outGroupCn));
Mat convMat(outGroupCn, outH*outW, convBlob.type(), convBlob.ptr(n, g*outGroupCn));
Mat wghtMat(outGroupCn, ksize, wghtBlob.type(), wghtBlob.ptr(g*outGroupCn));
cv::gemm(wghtMat, convMat, 1, noArray(), 0, colMat, GEMM_1_T);
dnn::gemm(wghtMat, convMat, 1, colMat, 0, GEMM_1_T);
col2im(dstMat);
if (!is1x1())
col2im(colMat, dstMat);
if (bias)
{
float *biasPtr = blobs[1].ptrf() + g*inpGroupCn;
Mat biasMat(inpGroupCn, 1, CV_32F, biasPtr);
cv::gemm(biasMat, biasOnesMat, 1, dstMat, 1, dstMat);
}
if (bias)
{
XMat curBiasMat = biasesMat.rowRange(_Range(g * outGroupCn, outGroupCn));
dnn::gemm(curBiasMat, biasOnesBlob.getRefConst<XMat>(), 1, dstMat, 1);
}
}
}
}
}
void DeConvolutionLayer::col2im(Mat &dstMat)
void DeConvolutionLayerImpl::col2im(const Mat &colMat, Mat &dstImg)
{
if (is1x1())
{
if (is1x1()) return;
dstImg = colMat;
return;
}
if (dstImg.type() == CV_32F)
col2im_CpuPBody<float>::run(colMat.ptr<float>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<float>());
if (dstImg.type() == CV_64F)
col2im_CpuPBody<double>::run(colMat.ptr<double>(), inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg.ptr<double>());
}
if (dstMat.type() == CV_32F)
col2im_cpu((float*)colMat.ptr(), inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (float*)dstMat.ptr());
if (dstMat.type() == CV_64F)
col2im_cpu((double*)colMat.ptr(), inpGroupCn, inpH, inpW, kerH, kerW, padH, padW, strideH, strideW, (double*)dstMat.ptr());
void DeConvolutionLayerImpl::col2im(const UMat &colMat, UMat &dstImg)
{
if (is1x1())
{
dstImg = colMat;
return;
}
#ifdef HAVE_OPENCL
CV_Assert(col2im_ocl(colMat, inpGroupCn, inpH, inpW, kernel.height, kernel.width, pad.height, pad.width, stride.height, stride.width, dstImg));
#else
CV_Error(Error::StsInternal, "");
dstImg = colMat;
#endif
}
//Initializers
Ptr<BaseConvolutionLayer> ConvolutionLayer::create(Size kernel, Size stride, Size pad)
{
ConvolutionLayerImpl *l = new ConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
return Ptr<BaseConvolutionLayer>(l);
}
Ptr<BaseConvolutionLayer> DeconvolutionLayer::create(Size kernel, Size stride, Size pad)
{
DeConvolutionLayerImpl *l = new DeConvolutionLayerImpl();
l->kernel = kernel;
l->pad = pad;
l->stride = stride;
return Ptr<BaseConvolutionLayer>(l);
}
}
}

@ -42,51 +42,65 @@
#ifndef __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_CONVOLUTION_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
//TODO: simultaneously convolution and bias addition for cache optimization
class ConvolutionLayer : public Layer
{
protected:
bool bias;
int numOutput, group;
int padH, padW;
int kerH, kerW;
int strideH, strideW;
int inpH, inpW, inpCn;
int outH, outW, outCn;
int topH, topW, topCn; //switched between inp/out on deconv/conv
int inpGroupCn, outGroupCn;
int ksize;
//TODO: simultaneously convolution and bias addition for cache optimization
class ConvolutionLayerImpl : public ConvolutionLayer
{
public:
ConvolutionLayerImpl();
virtual void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
virtual void init();
protected:
int numOutput, group;
int inpH, inpW, inpCn;
int outH, outW, outCn;
int topH, topW, topCn; //switched between inp/out on deconv/conv
int inpGroupCn, outGroupCn;
int ksize;
bool bias;
bool tryUseOpenCL, useOpenCL;
Blob colBlob, biasOnesBlob;
bool is1x1() const;
virtual void computeInpOutShape(const Blob &inpBlob);
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void im2col(const Mat &srcImg, Mat &dstCol);
void im2col(const UMat &srcImg, UMat &dstCol);
};
class DeConvolutionLayerImpl : public ConvolutionLayerImpl
{
public:
DeConvolutionLayerImpl();
virtual void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
bool useOpenCL;
Mat colMat, biasOnesMat;
protected:
inline bool is1x1() const;
virtual void computeInpOutShape(const Blob &inpBlob);
void im2col(Blob &inpBlob, int imNum, int cnGroup);
virtual void computeInpOutShape(const Blob &inpBlob);
public:
ConvolutionLayer() {}
ConvolutionLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void col2im(const Mat &colMat, Mat &dstImg);
void col2im(const UMat &colMat, UMat &dstImg);
};
class DeConvolutionLayer : public ConvolutionLayer
{
protected:
void computeInpOutShape(const Blob &inpBlob);
void col2im(Mat &dstMat);
//Importers
Ptr<Layer> createConvolutionLayerFromCaffe(LayerParams &params);
Ptr<Layer> createDeconvolutionLayerFromCaffe(LayerParams &params);
public:
DeConvolutionLayer(LayerParams &params);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif

@ -0,0 +1,46 @@
#include "../precomp.hpp"
#include "elementwise_layers.hpp"
namespace cv
{
namespace dnn
{
#define ACTIVATION_CREATOR_FOR(_Layer, _Functor, ...) \
Ptr<_Layer> _Layer::create() { \
return return Ptr<_Layer>( new ElementWiseLayer<_Functor>(_Functor()) ); }
Ptr<ReLULayer> ReLULayer::create(double negativeSlope)
{
return Ptr<ReLULayer>(new ElementWiseLayer<ReLUFunctor>(ReLUFunctor(negativeSlope)));
}
Ptr<TanHLayer> TanHLayer::create()
{
return Ptr<TanHLayer>(new ElementWiseLayer<TanHFunctor>());
}
Ptr<SigmoidLayer> SigmoidLayer::create()
{
return Ptr<SigmoidLayer>(new ElementWiseLayer<SigmoidFunctor>());
}
Ptr<AbsLayer> AbsLayer::create()
{
return Ptr<AbsLayer>(new ElementWiseLayer<AbsValFunctor>());
}
Ptr<BNLLLayer> BNLLLayer::create()
{
return Ptr<BNLLLayer>(new ElementWiseLayer<BNLLFunctor>());
}
Ptr<PowerLayer> PowerLayer::create(double power /*= 1*/, double scale /*= 1*/, double shift /*= 0*/)
{
const PowerFunctor f(power, scale, shift);
return Ptr<PowerLayer>(new ElementWiseLayer<PowerFunctor>(f));
}
}
}

@ -44,6 +44,11 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include <cmath>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/core/ocl.hpp>
#ifdef HAVE_OPENCL
#include "modules/dnn/opencl_kernels_dnn.hpp"
#endif
namespace cv
{
@ -55,130 +60,259 @@ using std::exp;
using std::tanh;
using std::pow;
template<typename Func>
class ElementWiseLayer : public Layer
template<typename Func>
class ElementWiseLayer : public Func::Layer
{
bool useOpenCL;
Func func;
template<typename Dtype>
class PBody : public cv::ParallelLoopBody
{
Func func;
Func &func;
Dtype *data;
public:
ElementWiseLayer(LayerParams &_params) : func(_params) {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
outputs[i].shareFrom(*inputs[i]); //no data copy
}
PBody(Mat &mat, Func &func_) :
func(func_), data(mat.ptr<Dtype>())
{}
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void operator()(const Range &r) const
{
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->ptr() == outputs[i].ptr() && inputs[i]->type() == outputs[i].type());
size_t size = outputs[i].total();
if (outputs[i].type() == CV_32F)
{
float *data = outputs[i].ptrf();
for (size_t j = 0; j < size; j++)
data[j] = func(data[j]);
}
else if (outputs[i].type() == CV_64F)
{
double *data = outputs[i].ptr<double>();
for (size_t j = 0; j < size; j++)
data[j] = func(data[j]);
}
else
{
CV_Error(Error::StsNotImplemented, "Only CV_32F and CV_64F blobs are supported");
}
}
for (int i = r.start; i < r.end; i++)
data[i] = func(data[i]);
}
};
public:
struct ReLUFunctor
ElementWiseLayer() {}
ElementWiseLayer(const Func &f) : func(f) {}
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
float negative_slope;
useOpenCL = ocl::useOpenCL();
ReLUFunctor(LayerParams &params)
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
if (params.has("negative_slope"))
negative_slope = params.get<float>("negative_slope");
outputs[i].shareFrom(*inputs[i]); //no data copy
//hotfix: shareFrom doesn't provide properly Mat/UMat switching
if (useOpenCL)
outputs[i].umatRef() = inputs[i]->umatRefConst();
else
negative_slope = 0.f;
outputs[i].matRef() = inputs[i]->matRefConst();
}
}
template<typename TFloat>
inline TFloat operator()(TFloat x)
{
return (x >= (TFloat)0) ? x : negative_slope * x;
}
};
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forwardOCL(inputs, outputs);
else
#endif
forwardCPU(inputs, outputs);
}
struct TanHFunctor
#ifdef HAVE_OPENCL
void forwardOCL(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
TanHFunctor(LayerParams&) {}
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
template<typename TFloat>
inline TFloat operator()(TFloat x)
for (size_t i = 0; i < inputs.size(); i++)
{
return tanh(x);
const UMat &src = inputs[i]->umatRefConst();
UMat &dst = outputs[i].umatRef();
CV_Assert(src.isContinuous() && dst.isContinuous() && !src.offset && !dst.offset);
ocl::Kernel ker;
CV_Assert(func.initKernel(ker, src));
ker.set(0, (int)src.total());
ker.set(1, ocl::KernelArg::PtrReadOnly(src));
ker.set(2, ocl::KernelArg::PtrWriteOnly(dst));
size_t gSize = src.total();
CV_Assert(ker.run(1, &gSize, &wgSize, true));
}
};
}
#endif
struct SigmoidFunctor
void forwardCPU(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
SigmoidFunctor(LayerParams&) {}
template<typename TFloat>
inline TFloat operator()(TFloat x)
for (size_t i = 0; i < inputs.size(); i++)
{
return (TFloat)1 / ((TFloat)1 + exp(-x));
const Mat &src = inputs[i]->matRefConst();
Mat &dst = outputs[i].matRef();
CV_Assert(src.ptr() == dst.ptr() && src.isContinuous());
Range sizeRange = Range(0, dst.total());
if (dst.type() == CV_32F)
{
cv::parallel_for_(sizeRange, PBody<float>(dst, func));
}
else if (dst.type() == CV_64F)
{
cv::parallel_for_(sizeRange, PBody<double>(dst, func));
}
else
{
CV_Error(Error::StsNotImplemented, "Only CV_32F and CV_64F blobs are supported");
}
}
};
}
};
struct AbsValFunctor
#ifdef HAVE_OPENCL
static String oclGetTMacro(const UMat &m)
{
return String("-DT=") + ocl::typeToStr(m.type()) + String(" ");
}
#endif
struct ReLUFunctor
{
typedef ReLULayer Layer;
double slope;
ReLUFunctor(double slope_)
: slope(slope_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
AbsValFunctor(LayerParams&) {}
return (x >= (TFloat)0) ? x : (TFloat)slope * x;
}
template<typename TFloat>
inline TFloat operator()(TFloat x)
{
return abs(x);
}
};
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
const char *buildoptSlope = (slope == 0) ? "-DRELU_NO_SLOPE" : "";
String buildopt = oclGetTMacro(src) + buildoptSlope;
struct PowerFunctor
if (!ker.create("ReLUForward", ocl::dnn::activations_oclsrc, buildopt))
return false;
if (slope != 0)
ker.set(3, (float)slope);
return true;
}
#endif
};
struct TanHFunctor
{
typedef TanHLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
float power, scale, shift;
return tanh(x);
}
PowerFunctor(LayerParams &params)
{
power = params.get<float>("power", 1.0f);
scale = params.get<float>("scale", 1.0f);
shift = params.get<float>("shift", 0.0f);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("TanHForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
template<typename TFloat>
inline TFloat operator()(TFloat x)
{
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
}
};
struct SigmoidFunctor
{
typedef SigmoidLayer Layer;
struct BNLLFunctor
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
BNLLFunctor(LayerParams&) {}
return (TFloat)1 / ((TFloat)1 + exp(-x));
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("SigmoidForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct AbsValFunctor
{
typedef AbsLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return abs(x);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("AbsValForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct BNLLFunctor
{
typedef BNLLLayer Layer;
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return log((TFloat)1 + exp(-abs(x)));
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("BNLLForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
return true;
}
#endif
};
struct PowerFunctor
{
typedef PowerLayer Layer;
double power, scale, shift;
PowerFunctor(double power_, double scale_ = 1, double shift_ = 0)
: power(power_), scale(scale_), shift(shift_) {}
template<typename TFloat>
inline TFloat operator()(TFloat x) const
{
return pow((TFloat)shift + (TFloat)scale * x, (TFloat)power);
}
#ifdef HAVE_OPENCL
bool initKernel(ocl::Kernel &ker, const UMat &src) const
{
if (!ker.create("PowForward", ocl::dnn::activations_oclsrc, oclGetTMacro(src)))
return false;
ker.set(3, (float)power);
ker.set(4, (float)scale);
ker.set(5, (float)shift);
return true;
}
#endif
};
template<typename TFloat>
inline TFloat operator()(TFloat x)
{
return log((TFloat)1 + exp(-abs(x)));
}
};
}
}
#endif

@ -42,73 +42,88 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "fully_connected_layer.hpp"
#include "op_blas.hpp"
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/core/ocl.hpp>
namespace cv
{
namespace dnn
{
FullyConnectedLayer::FullyConnectedLayer(LayerParams &params) : Layer(params)
{
numOutputs = params.get<int>("num_output");
bias = params.get<bool>("bias_term", true);
axis_ = params.get<int>("axis", 1);
CV_Assert(blobs.size() == (bias ? 2U : 1U));
CV_Assert(blobs[0].dims() >= 2 && blobs[0].total() >= (size_t)numOutputs);
CV_Assert(!bias || blobs[1].total() == (size_t)numOutputs);
}
FullyConnectedLayerImpl::FullyConnectedLayerImpl(int axis_)
{
axis = axis_;
}
void FullyConnectedLayer::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
{
CV_Assert(input.size() > 0);
void FullyConnectedLayerImpl::allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
{
CV_Assert(input.size() > 0);
CV_Assert(1 <= blobs.size() && blobs.size() <= 2);
CV_Assert(blobs[0].dims() == 2);
axis = input[0]->canonicalAxis(axis_);
innerSize = (int)input[0]->total(axis);
bias = (blobs.size() >= 1);
axisCan = input[0]->canonicalAxis(axis);
dtype = input[0]->type();
numOutput = blobs[0].size(0);
innerSize = blobs[0].size(1);
outerSize = input[0]->total(0, axisCan);
CV_Assert((size_t)innerSize * (size_t)numOutputs == blobs[0].total());
CV_Assert(blobs[0].size(-2) == numOutputs && blobs[0].size(-1) == innerSize);
CV_Assert((size_t)innerSize == input[0]->total(axisCan));
CV_Assert(!bias || (size_t)numOutput == blobs[1].total());
output.resize(input.size());
for (size_t i = 0; i < input.size(); i++)
{
if (i != 0)
CV_Assert(input[i]->equalShape(*input[0]));
useOpenCL = ocl::useOpenCL();
int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_UMAT;
this->reshape(*input[i], output[i]);
}
}
biasOnesBlob.create(Shape(outerSize, 1), dtype, allocFlags);
biasOnesBlob.setTo(1);
void FullyConnectedLayer::reshape(const Blob &inp, Blob &out)
output.resize(input.size());
for (size_t i = 0; i < input.size(); i++)
{
BlobShape inpShape = inp.shape();
BlobShape outShape(axis+1, inpShape.ptr());
outShape[axis] = numOutputs;
CV_Assert(i == 0 || (input[i]->equalShape(*input[0]) && input[i]->type() == dtype));
Shape outShape = input[i]->shape().slice(0, axis) + Shape(numOutput);
output[i].create(outShape, dtype, allocFlags);
}
}
out.create(outShape, inp.type());
void FullyConnectedLayerImpl::forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forward_<UMat>(input, output);
else
#endif
forward_<Mat>(input, output);
}
template<typename XMat>
void FullyConnectedLayerImpl::forward_(std::vector<Blob *> &input, std::vector<Blob> &output)
{
const XMat &weight = blobs[0].getRefConst<XMat>();
const XMat *biasMat = NULL, *biasOnesMat = NULL;
if (bias)
{
biasOnesMat = &biasOnesBlob.getRefConst<XMat>();
biasMat = &blobs[1].getRefConst<XMat>();
}
void FullyConnectedLayer::forward(std::vector<Blob*> &input, std::vector<Blob> &output)
for (size_t i = 0; i < input.size(); i++)
{
for (size_t i = 0; i < input.size(); i++)
{
int M = (int)input[i]->total(0, axis);
int N = numOutputs;
int K = innerSize;
Mat srcMat(M, K, input[i]->type(), input[i]->ptrf());
Mat weight(N, K, blobs[0].type(), blobs[0].ptrf());
Mat dstMat(M, N, output[i].type(), output[i].ptrf());
//important: Caffe stores weights as transposed array
cv::gemm(srcMat, weight, 1, noArray(), 0, dstMat, GEMM_2_T);
if (bias)
{
Mat biasOnesMat = Mat::ones(M, 1, CV_32F);
Mat biasMat(1, N, CV_32F, blobs[1].ptrf());
cv::gemm(biasOnesMat, biasMat, 1, dstMat, 1, dstMat);
}
}
const XMat srcMat = reshaped(input[i]->getRefConst<XMat>(), Shape(outerSize, innerSize));
XMat dstMat = reshaped(output[i].getRef<XMat>(), Shape(outerSize, numOutput));
dnn::gemm(srcMat, weight, 1, dstMat, 0, GEMM_2_T);
if (bias)
dnn::gemm(*biasOnesMat, *biasMat, 1, dstMat, 1);
}
}
Ptr<InnerProductLayer> InnerProductLayer::create(int axis)
{
return Ptr<InnerProductLayer>(new FullyConnectedLayerImpl(axis));
}
}
}

@ -42,26 +42,30 @@
#ifndef __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_FULLY_CONNECTED_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class FullyConnectedLayer : public Layer
{
bool bias;
int numOutputs;
int axis_, axis;
int innerSize;
class FullyConnectedLayerImpl : public InnerProductLayer
{
int axisCan, dtype;
int numOutput, innerSize, outerSize;
bool bias, useOpenCL;
Blob biasOnesBlob;
template<typename XMat>
void forward_(std::vector<Blob*> &input, std::vector<Blob> &output);
public:
void reshape(const Blob &inp, Blob &out);
FullyConnectedLayerImpl(int axisCan = 1);
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
public:
FullyConnectedLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif

@ -46,44 +46,5 @@ namespace cv
namespace dnn
{
void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW)
{
if (params.has("kernel_h") && params.has("kernel_w"))
{
kernelH = params.get<int>("kernel_h");
kernelW = params.get<int>("kernel_w");
}
else if (params.has("kernel_size"))
{
kernelH = kernelW = params.get<int>("kernel_size");
}
else
{
CV_Error(cv::Error::StsBadArg, "kernel_size (or kernel_h and kernel_w) not specified");
}
if (params.has("pad_h") && params.has("pad_w"))
{
padH = params.get<int>("pad_h");
padW = params.get<int>("pad_w");
}
else
{
padH = padW = params.get<int>("pad", 0);
}
if (params.has("stride_h") && params.has("stride_w"))
{
strideH = params.get<int>("stride_h");
strideW = params.get<int>("stride_w");
}
else
{
strideH = strideW = params.get<int>("stride", 1);
}
CV_Assert(kernelH > 0 && kernelW > 0 && padH >= 0 && padW >= 0 && strideH > 0 && strideW > 0);
}
}
}

@ -42,14 +42,14 @@
#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__
#include <opencv2/dnn.hpp>
#include "op_blas.hpp"
#include "op_im2col.hpp"
namespace cv
{
namespace dnn
{
void getKernelParams(LayerParams &params, int &kernelH, int &kernelW, int &padH, int &padW, int &strideH, int &strideW);
}
}

@ -42,123 +42,213 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "lrn_layer.hpp"
#include "modules/dnn/opencl_kernels_dnn.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/dnn/shape_utils.hpp>
#include <algorithm>
namespace cv
{
namespace dnn
{
LRNLayer::LRNLayer(LayerParams &params) : Layer(params)
{
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
if (nrmType == "ACROSS_CHANNELS")
type = CHANNEL_NRM;
else if (nrmType == "WITHIN_CHANNEL")
type = SPATIAL_NRM;
else
CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
size = params.get<int>("local_size", 5);
if (size % 2 != 1 || size <= 0)
CV_Error(Error::StsBadArg, "LRN layer supports only positive odd values for local_size");
alpha = params.get<double>("alpha", 1);
beta = params.get<double>("beta", 0.75);
}
void LRNLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
outputs.resize(1);
LRNLayerImpl::LRNLayerImpl(int type_, int size_, double alpha_, double beta_)
{
type = type_;
size = size_;
alpha = alpha_;
beta = beta_;
}
Vec4i shape = inputs[0]->shape4();
outputs[0].create(shape);
void LRNLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1 && inputs[0]->dims() == 4);
CV_Assert(type == CHANNEL_NRM || type == SPATIAL_NRM);
useOpenCL = cv::ocl::useOpenCL();
shape[0] = 1; //maybe make shape[0] = 1 too
bufBlob.create(shape);
}
if (type == SPATIAL_NRM && !useOpenCL)
buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_MAT);
if (type == CHANNEL_NRM && useOpenCL)
buf.create(inputs[0]->shape().slice(2), inputs[0]->type(), Blob::ALLOC_UMAT);
void LRNLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
outputs.resize(1);
outputs[0].create(inputs[0]->shape(), inputs[0]->type());
}
void LRNLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
switch (type)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
case CHANNEL_NRM:
channelNoramlization(src, dst);
break;
case SPATIAL_NRM:
spatialNormalization(src, dst);
break;
default:
CV_Error(Error::StsNotImplemented, "Unimplemented mode of LRN layer");
break;
}
}
switch (type)
{
case CHANNEL_NRM:
channelNoramlization(src, dst);
break;
case SPATIAL_NRM:
spatialNormalization(src, dst);
break;
default:
CV_Error(cv::Error::StsNotImplemented, "Unimplemented mode of LRN layer");
break;
}
template<typename XMat>
static XMat getPlane(XMat &m, int n, int cn)
{
return reshaped(slice(m, n, cn), BlobShape::like(m).slice(2));
}
void LRNLayerImpl::channelNoramlization(Blob &src, Blob &dst)
{
if (!useOpenCL)
channelNoramlization_<Mat>(src, dst);
else
{
//channelNoramlization_ocl(src.getRefConst<UMat>(), dst.getRef<UMat>()); //consumes a lot of memory
channelNoramlization_<UMat>(src, dst);
}
}
void LRNLayer::channelNoramlization(Blob &srcBlob, Blob &dstBlob)
template<typename XMat>
void LRNLayerImpl::channelNoramlization_(Blob &srcBlob, Blob &dstBlob)
{
int num = srcBlob.num();
int channels = srcBlob.channels();
int ksize = (size - 1) / 2;
XMat srcMat = srcBlob.getRefConst<XMat>();
XMat dstMat = dstBlob.getRef<XMat>();
for (int n = 0; n < num; n++)
{
CV_DbgAssert(srcBlob.ptr() != dstBlob.ptr());
XMat accum = getPlane(dstMat, n, channels-1); //trick for memory saving
accum.setTo(0);
int num = srcBlob.num();
int channels = srcBlob.channels();
int ksize = (size - 1) / 2;
for (int cn = 0; cn < std::min(ksize, channels); cn++)
cv::accumulateSquare(getPlane(srcMat, n, cn), accum);
for (int n = 0; n < num; n++)
for (int cn = 0; cn < channels; cn++)
{
Mat accum = dstBlob.getPlane(n, channels-1); //trick for memory saving
accum.setTo(0);
for (int cn = 0; cn < std::min(ksize, channels); cn++)
cv::accumulateSquare(srcBlob.getPlane(n, cn), accum);
if (cn + ksize < channels)
{
cv::accumulateSquare(getPlane(srcMat, n, cn + ksize), accum);
}
for (int cn = 0; cn < channels; cn++)
if (cn - ksize - 1 >= 0)
{
if (cn + ksize < channels)
{
cv::accumulateSquare(srcBlob.getPlane(n, cn + ksize), accum);
}
if (cn - ksize - 1 >= 0)
{
Mat left = srcBlob.getPlane(n, cn - ksize - 1);
cv::subtract(accum, left.mul(left), accum); //subtractSquare
}
Mat dst = dstBlob.getPlane(n, cn);
accum.convertTo(dst, dst.type(), alpha/size, 1);
cv::pow(dst, beta, dst);
cv::divide(srcBlob.getPlane(n, cn), dst, dst);
//subtractSquare
XMat left = getPlane(srcMat, n, cn - ksize - 1);
cv::pow(left, 2, left);
cv::subtract(accum, left, accum);
}
XMat dst = getPlane(dstMat, n, cn);
accum.convertTo(dst, dst.type(), alpha/size, 1);
cv::pow(dst, beta, dst);
cv::divide(getPlane(srcMat, n, cn), dst, dst);
}
}
}
void LRNLayer::spatialNormalization(Blob &srcBlob, Blob &dstBlob)
{
int num = srcBlob.num();
int channels = srcBlob.channels();
bool LRNLayerImpl::channelNoramlization_ocl(const UMat &src, UMat &dst)
{
#ifdef HAVE_OPENCL
if (src.offset != 0 || dst.offset != 0) //TODO: add offset
return false;
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kerScale("LRNFillScale", ocl::dnn::lrn_oclsrc, buildOpts);
if (kerScale.empty())
return false;
ocl::Kernel kerOutput("LRNComputeOutput", ocl::dnn::lrn_oclsrc, buildOpts);
if (kerOutput.empty())
return false;
Shape shape = Shape::like(src);
int ksize = (size - 1) / 2;
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
UMat &scaleBuf = buf.umatRef();
size_t nthreads = (size_t)(shape.total() / shape[1]);
kerScale.args((int)nthreads,
ocl::KernelArg::PtrReadOnly(src), shape[0], shape[1], shape[2], shape[3],
size, (float)(alpha/size), (float)ksize, ocl::KernelArg::PtrWriteOnly(scaleBuf));
if (!kerScale.run(1, &nthreads, &wgSize, true))
return false;
nthreads = (size_t)shape.total();
kerOutput.args((int)nthreads,
ocl::KernelArg::PtrReadOnly(src), ocl::KernelArg::PtrReadOnly(scaleBuf),
-beta, ocl::KernelArg::PtrWriteOnly(dst) );
if (!kerOutput.run(1, &nthreads, &wgSize, true))
return false;
return true;
#else
(void)src;
(void)dst;
return false;
#endif
}
void LRNLayerImpl::spatialNormalization(Blob &src, Blob &dst)
{
if (!useOpenCL)
spatialNormalization_<Mat>(src, dst);
else
spatialNormalization_<UMat>(src, dst);
}
//TODO: fix cv::boxFilter with BORDER_ISOLATED flag in CPU mode
template<>
void LRNLayerImpl::sqrBoxFilter_<Mat>(const Mat &src, Mat &dst)
{
Mat bufMat = buf.getRef<Mat>();
src.copyTo(bufMat);
cv::sqrBoxFilter(bufMat, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT);
}
template<>
void LRNLayerImpl::sqrBoxFilter_<UMat>(const UMat &src, UMat &dst)
{
cv::sqrBoxFilter(src, dst, dst.depth(), Size(size, size), Point(-1, -1), false, BORDER_CONSTANT | BORDER_ISOLATED);
}
for (int n = 0; n < num; n++)
template<typename XMat>
void LRNLayerImpl::spatialNormalization_(Blob &srcBlob, Blob &dstBlob)
{
int num = srcBlob.num();
int channels = srcBlob.channels();
XMat srcMat = srcBlob.getRefConst<XMat>();
XMat dstMat = dstBlob.getRef<XMat>();
for (int n = 0; n < num; n++)
{
for (int cn = 0; cn < channels; cn++)
{
for (int cn = 0; cn < channels; cn++)
{
Mat src = srcBlob.getPlane(n, cn);
Mat dst = dstBlob.getPlane(n, cn);
uchar *dataDst0 = dst.data;
cv::pow(srcBlob.getPlane(n, cn), 2, dst);
//TODO: check border type
cv::boxFilter(dst, dst, dst.depth(), cv::Size(size, size), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
cv::pow(dst, beta, dst);
cv::divide(src, dst, dst);
CV_Assert(dataDst0 == dst.data); //debug
}
XMat src = getPlane(srcMat, n, cn);
XMat dst = getPlane(dstMat, n, cn);
sqrBoxFilter_(src, dst);
dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
cv::pow(dst, beta, dst);
cv::divide(src, dst, dst);
}
}
}
Ptr<LRNLayer> LRNLayer::create(int type, int size, double alpha, double beta)
{
return Ptr<LRNLayer>(new LRNLayerImpl(type, size, alpha, beta));
}
}
}

@ -42,34 +42,36 @@
#ifndef __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_LRN_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class LRNLayer : public Layer
{
enum
{
CHANNEL_NRM,
SPATIAL_NRM,
SPATIAL_CONTRAST_NRM //cuda-convnet feature
} type;
int size;
double alpha, beta;
class LRNLayerImpl : public LRNLayer
{
bool useOpenCL;
Blob buf;
void channelNoramlization(Blob &src, Blob &dst);
template<typename XMat>
void channelNoramlization_(Blob &src, Blob &dst);
bool channelNoramlization_ocl(const UMat &src, UMat &dst);
Blob bufBlob;
void spatialNormalization(Blob &src, Blob &dst);
template<typename XMat>
void spatialNormalization_(Blob &src, Blob &dst);
template<typename XMat>
void sqrBoxFilter_(const XMat &src, XMat &dst);
void channelNoramlization(Blob &src, Blob &dst);
void spatialNormalization(Blob &src, Blob &dst);
public:
public:
LRNLayerImpl(int type = CHANNEL_NRM, int size = 5, double alpha = 1, double beta = 0.75);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
LRNLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif

@ -42,20 +42,21 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "mvn_layer.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
MVNLayer::MVNLayer(LayerParams &params) : Layer(params)
MVNLayerImpl::MVNLayerImpl(bool normVariance_, bool acrossChannels_, double eps_)
{
eps = params.get<double>("eps", 1e-9);
acrossChannels = params.get<bool>("across_channels", false);
normalizeVariance = params.get<bool>("normalize_variance", true);
normVariance = normVariance_;
acrossChannels = acrossChannels_;
eps = eps_;
}
void MVNLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
void MVNLayerImpl::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
@ -65,20 +66,17 @@ void MVNLayer::allocate(const std::vector<Blob *> &inputs, std::vector<Blob> &ou
}
}
void MVNLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
void MVNLayerImpl::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
{
for (size_t inpIdx = 0; inpIdx < inputs.size(); inpIdx++)
{
Blob &inpBlob = *inputs[inpIdx];
Blob &outBlob = outputs[inpIdx];
int workSize[2];
int splitDim = (acrossChannels) ? 1 : 2;
workSize[0] = (int)inpBlob.total(0, splitDim);
workSize[1] = (int)inpBlob.total(splitDim);
Mat inpMat = inpBlob.matRef().reshape(1, 2, workSize);
Mat outMat = outBlob.matRef().reshape(1, 2, workSize);
Shape workSize((int)inpBlob.total(0, splitDim), (int)inpBlob.total(splitDim));
Mat inpMat = reshaped(inpBlob.matRefConst(), workSize);
Mat outMat = reshaped(outBlob.matRef(), workSize);
Scalar mean, dev;
for (int i = 0; i < workSize[0]; i++)
@ -86,12 +84,18 @@ void MVNLayer::forward(std::vector<Blob *> &inputs, std::vector<Blob> &outputs)
Mat inpRow = inpMat.row(i);
Mat outRow = outMat.row(i);
cv::meanStdDev(inpRow, mean, (normalizeVariance) ? dev : noArray());
double alpha = (normalizeVariance) ? 1/(eps + dev[0]) : 1;
cv::meanStdDev(inpRow, mean, (normVariance) ? dev : noArray());
double alpha = (normVariance) ? 1/(eps + dev[0]) : 1;
inpRow.convertTo(outRow, outRow.type(), alpha, -mean[0] * alpha);
}
}
}
Ptr<MVNLayer> MVNLayer::create(bool normVariance, bool acrossChannels, double eps)
{
return Ptr<MVNLayer>(new MVNLayerImpl(normVariance, acrossChannels, eps));
}
}
}

@ -42,20 +42,18 @@
#ifndef __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_MVN_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class MVNLayer : public Layer
class MVNLayerImpl : public MVNLayer
{
double eps;
bool acrossChannels, normalizeVariance;
public:
MVNLayer(LayerParams &params);
MVNLayerImpl(bool normVariance_ = true, bool acrossChannels_ = false, double eps_ = 1e-9);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};

@ -0,0 +1,95 @@
#include "op_blas.hpp"
#if HAVE_CBLAS
#include "opencv_cblas.hpp"
#endif
#include <iostream>
namespace cv
{
namespace dnn
{
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags)
{
if (C.isMat())
gemmCPU(A.getMat(), B.getMat(), alpha, C.getMatRef(), beta, flags);
else
{
cv::gemm(A, B, alpha, (beta == 0) ? noArray() : C, beta, C, flags);
}
}
inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans)
{
CV_DbgAssert(A.dims == 2);
rows = (isTrans) ? A.cols : A.rows;
cols = (isTrans) ? A.rows : A.cols;
}
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags /*= 0*/)
{
#if HAVE_CBLAS
bool transA = static_cast<bool>(flags & GEMM_1_T);
bool transB = static_cast<bool>(flags & GEMM_2_T);
bool transC = static_cast<bool>(flags & GEMM_3_T);
int Arows, Acols, Brows, Bcols, Crows, Ccols;
SwapRowCols(A, Arows, Acols, transA);
SwapRowCols(B, Brows, Bcols, transB);
SwapRowCols(C, Crows, Ccols, transC);
CV_Assert(!(flags & GEMM_3_T));
CV_Assert(Acols == Brows && Arows == Crows && Bcols == Ccols);
CV_Assert(A.isContinuous() && B.isContinuous() && C.isContinuous());
CV_Assert(A.type() == B.type() && B.type() == C.type());
CV_Assert(A.data != C.data && B.data != C.data);
if (C.type() == CV_32F)
{
cblas_sgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
(float)alpha, A.ptr<float>(), A.cols,
B.ptr<float>(), B.cols,
(float)beta, C.ptr<float>(), C.cols);
}
else if (C.type() == CV_64F)
{
//TODO: Should be tested
cblas_dgemm(CblasRowMajor, transA ? CblasTrans : CblasNoTrans, transB ? CblasTrans : CblasNoTrans,
Arows, Bcols, Acols,
alpha, A.ptr<double>(), A.cols,
B.ptr<double>(), B.cols,
beta, C.ptr<double>(), C.cols);
}
else
{
CV_Error(Error::BadDepth, "Only floating point types are supported");
}
#else
cv::gemm(A, B, alpha, C, beta, C, flags);
#endif
}
int getBlasThreads()
{
#ifdef OPENBLAS_VERSION
return openblas_get_num_threads();
#else
return 1;
#endif
}
void setBlasThreads(int numThreads)
{
#ifdef OPENBLAS_VERSION
openblas_set_num_threads(numThreads);
goto_set_num_threads(numThreads);
#else
(void)numThreads; //suppress compilers' warning
#endif
}
}
}

@ -39,47 +39,21 @@
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_OP_BLAS_HPP__
#define __OPENCV_DNN_LAYERS_OP_BLAS_HPP__
#include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "im2col.hpp"
#include "opencl_kernels_dnn.hpp"
namespace cv
{
namespace dnn
{
int getBlasThreads();
#ifdef HAVE_OPENCL
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col)
{
int h_out = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int w_out = (width + 2 * pad_w - kernel_w) / stride_w + 1;
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)channels * kernel_h * kernel_w * h_out * w_out);
ocl::Kernel im2col_ker("im2col", ocl::dnn::im2col_oclsrc);
CV_Assert(!im2col_ker.empty());
void setBlasThreads(int numThreads);
im2col_ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset,
channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
h_out, w_out,
ocl::KernelArg::PtrWriteOnly(col), (int)col.offset
);
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = (size_t)channels * h_out * w_out;
CV_Assert(im2col_ker.run(1, &globalSize, &localSize, true));
}
#endif // HAVE_OPENCL
void gemm(InputArray A, InputArray B, double alpha, InputOutputArray C, double beta, int flags = 0);
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags = 0);
}
}
#endif

@ -39,88 +39,84 @@
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#include "../precomp.hpp"
#include <opencv2/core/ocl.hpp>
#include "opencl_kernels_dnn.hpp"
#include "op_im2col.hpp"
namespace cv
{
namespace dnn
{
template <typename Dtype>
void im2col_cpu(const Dtype* data_im,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_col)
#ifdef HAVE_OPENCL
bool im2col_ocl(const UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col)
{
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad];
else
data_col[(c * height_col + h) * width_col + w] = 0;
}
}
}
int esz = img.elemSize();
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)channels_col * height_col * width_col);
ocl::Kernel ker("im2col", ocl::dnn::im2col_oclsrc, String("-DT=") + ocl::typeToStr(img.type()));
if (ker.empty())
return false;
ker.args(ocl::KernelArg::PtrReadOnly(img), (int)img.offset/esz,
channels, height, width,
kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
height_col, width_col,
ocl::KernelArg::PtrWriteOnly(col), (int)col.offset/esz
);
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = (size_t)channels * height_col * width_col;
return ker.run(1, &globalSize, &localSize, true);
}
template <typename Dtype>
void col2im_cpu(const Dtype* data_col,
bool col2im_ocl(const UMat &col,
int channels, int height, int width,
int patch_h, int patch_w,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
UMat &img)
{
memset(data_im, 0, height * width * channels * sizeof(Dtype));
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
int esz = img.elemSize();
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
int channels_col = channels * patch_h * patch_w;
CV_Assert(img.isContinuous() && col.isContinuous());
CV_Assert(img.total() == (size_t)channels * height * width);
CV_Assert(col.total() == (size_t)channels_col * height_col * width_col);
for (int c = 0; c < channels_col; ++c)
{
int w_offset = c % patch_w;
int h_offset = (c / patch_w) % patch_h;
int c_im = c / patch_h / patch_w;
ocl::Kernel ker("col2im", ocl::dnn::col2im_oclsrc, String("-DT=") + ocl::typeToStr(col.type()));
if (ker.empty())
return false;
for (int h = 0; h < height_col; ++h)
{
for (int w = 0; w < width_col; ++w)
{
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
ker.args((int)img.total(),
ocl::KernelArg::PtrReadOnly(col), (int)col.offset/esz,
height, width, channels,
kernel_h, kernel_w,
pad_h, pad_w,
stride_h, stride_w,
height_col, width_col,
ocl::KernelArg::PtrWriteOnly(img), (int)img.offset/esz);
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(c_im * height + h_pad) * width + w_pad] +=
data_col[(c * height_col + h) * width_col + w];
}
}
}
size_t localSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t globalSize = img.total();
return ker.run(1, &globalSize, &localSize, true);
}
#ifdef HAVE_OPENCL
void im2col_ocl(UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col);
#endif
}
}
#endif

@ -0,0 +1,231 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_IM2COL_HPP__
#define __OPENCV_DNN_LAYERS_IM2COL_HPP__
#include "../precomp.hpp"
#include <iostream>
namespace cv
{
namespace dnn
{
template <typename Dtype>
class im2col_CpuPBody : public cv::ParallelLoopBody
{
const Dtype* data_im;
int channels, height, width;
int kernel_h, kernel_w;
int pad_h, pad_w;
int stride_h, stride_w;
Dtype* data_col;
int height_col, width_col, channels_col;
im2col_CpuPBody() {}
public:
static void run(const Dtype* data_im,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_col)
{
im2col_CpuPBody<Dtype> t;
t.data_im = data_im;
t.data_col = data_col;
t.channels = channels; t.height = height; t.width = width;
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
t.channels_col = channels * kernel_h * kernel_w;
cv::parallel_for_(Range(0, t.channels_col), t);
}
virtual void operator ()(const Range &r) const
{
for (int c = r.start; c < r.end; ++c) {
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(c * height_col + h) * width_col + w] =
data_im[(c_im * height + h_pad) * width + w_pad];
else
data_col[(c * height_col + h) * width_col + w] = 0;
}
}
}
}
};
template <typename Dtype>
class col2im_CpuPBody : public cv::ParallelLoopBody
{
const Dtype* data_col;
int channels, height, width;
int kernel_h, kernel_w;
int pad_h, pad_w;
int stride_h, stride_w;
Dtype* data_im;
int height_col, width_col;
col2im_CpuPBody() {}
public:
static void run(const Dtype* data_col,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
{
//TODO: single-threaded version switch
col2im_CpuPBody t;
t.data_col = data_col;
t.data_im = data_im;
t.channels = channels; t.height = height; t.width = width;
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int img_total = channels * height * width;
cv::parallel_for_(Range(0, img_total), t);
}
virtual void operator ()(const Range &r) const
{
for (int index = r.start; index < r.end; index++)
{
Dtype val = 0;
int w = index % width + pad_w;
int h = (index / width) % height + pad_h;
int c = index / (width * height);
// compute the start and end of the output
int w_col_start = (w < kernel_w) ? 0 : (w - kernel_w) / stride_w + 1;
int w_col_end = std::min(w / stride_w + 1, width_col);
int h_col_start = (h < kernel_h) ? 0 : (h - kernel_h) / stride_h + 1;
int h_col_end = std::min(h / stride_h + 1, height_col);
// equivalent implementation
int offset =
(c * kernel_h * kernel_w + h * kernel_w + w) * height_col * width_col;
int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col;
int coeff_w_col = (1 - stride_w * height_col * width_col);
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
}
}
data_im[index] = val;
}
}
};
//single-threaded version
template <typename Dtype>
void col2im_cpu(const Dtype* data_col,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
Dtype* data_im)
{
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
std::memset(data_im, 0, height * width * channels * sizeof(Dtype));
for (int c = 0; c < channels_col; ++c)
{
int w_offset = c % kernel_w;
int h_offset = (c / kernel_w) % kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h)
{
for (int w = 0; w < width_col; ++w)
{
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(c_im * height + h_pad) * width + w_pad] +=
data_col[(c * height_col + h) * width_col + w];
}
}
}
}
#ifdef HAVE_OPENCL
bool im2col_ocl(const UMat &img,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &col);
bool col2im_ocl(const UMat &col,
int channels, int height, int width,
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
UMat &img);
#endif
}
}
#endif

@ -42,8 +42,10 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "pooling_layer.hpp"
#include "opencl_kernels_dnn.hpp"
#include <float.h>
#include <algorithm>
#include <opencv2/core/ocl.hpp>
using std::max;
using std::min;
@ -53,155 +55,216 @@ namespace dnn
{
//TODO: add ceil_mode param
PoolingLayer::PoolingLayer(LayerParams &params) : Layer(params)
{
if (params.has("pool"))
{
String pool = params.get<String>("pool").toLowerCase();
if (pool == "max")
type = MAX;
else if (pool == "ave")
type = AVE;
else if (pool == "stochastic")
type = STOCHASTIC;
else
CV_Error(cv::Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
}
else
{
type = MAX;
}
PoolingLayerImpl::PoolingLayerImpl()
{
getKernelParams(params, kernelH, kernelW, padH, padW, strideH, strideW);
}
}
void PoolingLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
PoolingLayerImpl::PoolingLayerImpl(int type_, Size kernel_, Size stride_, Size pad_)
{
type = type_;
kernel = kernel_;
pad = pad_;
stride = stride_;
}
inpW = inputs[0]->cols();
inpH = inputs[0]->rows();
computeOutputShape(inpH, inpW);
void PoolingLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() > 0);
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->rows() == inpH && inputs[i]->cols() == inpW);
outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), outH, outW));
}
inp = inputs[0]->size2();
computeOutputShape(inp);
useOpenCL = ocl::useOpenCL();
outputs.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
CV_Assert(inputs[i]->rows() == inp.height && inputs[i]->cols() == inp.width);
outputs[i].create(BlobShape(inputs[i]->num(), inputs[i]->channels(), out.height, out.width));
}
}
void PoolingLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void PoolingLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for (size_t ii = 0; ii < inputs.size(); ii++)
{
for (size_t ii = 0; ii < inputs.size(); ii++)
switch (type)
{
switch (type)
{
case MAX:
maxPooling(*inputs[ii], outputs[ii]);
break;
case AVE:
avePooling(*inputs[ii], outputs[ii]);
break;
default:
CV_Error(cv::Error::StsNotImplemented, "Not implemented");
break;
}
case MAX:
maxPooling(*inputs[ii], outputs[ii]);
break;
case AVE:
avePooling(*inputs[ii], outputs[ii]);
break;
default:
CV_Error(Error::StsNotImplemented, "Not implemented");
break;
}
}
}
void PoolingLayerImpl::maxPooling(Blob &src, Blob &dst)
{
if (!useOpenCL)
maxPooling_cpu(src, dst);
else
{
CV_Assert(maxPooling_ocl(src, dst));
}
}
void PoolingLayer::maxPooling(Blob &input, Blob &output)
bool PoolingLayerImpl::maxPooling_ocl(Blob &src, Blob &dst)
{
return pooling_ocl("MaxPoolForward", src, dst);
}
void PoolingLayerImpl::avePooling(Blob &src, Blob &dst)
{
if (!useOpenCL)
avePooling_cpu(src, dst);
else
{
CV_DbgAssert(output.rows() == outH && output.cols() == outW);
CV_Assert(avePooling_ocl(src, dst));
}
}
bool PoolingLayerImpl::avePooling_ocl(Blob &src, Blob &dst)
{
return pooling_ocl("AvePoolForward", src, dst);
}
for (int n = 0; n < input.num(); ++n)
void PoolingLayerImpl::maxPooling_cpu(Blob &src, Blob &dst)
{
CV_DbgAssert(dst.rows() == out.height && dst.cols() == out.width);
for (int n = 0; n < src.num(); ++n)
{
for (int c = 0; c < src.channels(); ++c)
{
for (int c = 0; c < input.channels(); ++c)
{
float *srcData = input.ptrf(n, c);
float *dstData = output.ptrf(n, c);
const float *srcData = src.ptrf(n, c);
float *dstData = dst.ptrf(n, c);
for (int ph = 0; ph < outH; ++ph)
for (int ph = 0; ph < out.height; ++ph)
{
for (int pw = 0; pw < out.width; ++pw)
{
for (int pw = 0; pw < outW; ++pw)
{
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW;
int hend = min(hstart + kernelH, inpH);
int wend = min(wstart + kernelW, inpW);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
const int poolIndex = ph * outW + pw;
float max_val = -FLT_MAX;
for (int h = hstart; h < hend; ++h)
for (int w = wstart; w < wend; ++w)
{
const int index = h * inpW + w;
if (srcData[index] > max_val)
max_val = srcData[index];
}
dstData[poolIndex] = max_val;
}
int hstart = ph * stride.height - pad.height;
int wstart = pw * stride.width - pad.width;
int hend = min(hstart + kernel.height, inp.height);
int wend = min(wstart + kernel.width, inp.width);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
const int poolIndex = ph * out.width + pw;
float max_val = -FLT_MAX;
for (int h = hstart; h < hend; ++h)
for (int w = wstart; w < wend; ++w)
{
const int index = h * inp.width + w;
if (srcData[index] > max_val)
max_val = srcData[index];
}
dstData[poolIndex] = max_val;
}
}
}
}
}
#ifdef HAVE_OPENCL
bool PoolingLayerImpl::pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask)
{
const UMat &srcMat = src.umatRefConst();
UMat &dstMat = dst.umatRef();
CV_Assert(mask == NULL && srcMat.offset == 0 && dstMat.offset == 0);
ocl::Kernel ker(kname, ocl::dnn::pooling_oclsrc, String("-DT=") + ocl::typeToStr(src.type()));
if (ker.empty())
return false;
BlobShape s = src.shape();
size_t nthreads = dst.total();
ker.args((int)nthreads,
ocl::KernelArg::PtrReadOnly(srcMat), s[0], s[1], s[2], s[3],
out.height, out.width, kernel.height, kernel.width,
stride.height, stride.width, pad.height, pad.width,
ocl::KernelArg::PtrWriteOnly(dstMat));
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
if (!ker.run(1, &nthreads, &wgSize, true))
return false;
return true;
}
#else
bool PoolingLayerImpl::pooling_ocl(const char*, const Blob&, Blob&, Blob*)
{
return false;
}
#endif
void PoolingLayer::avePooling(Blob &input, Blob &output)
void PoolingLayerImpl::avePooling_cpu(Blob &src, Blob &dst)
{
for (int n = 0; n < src.num(); ++n)
{
for (int n = 0; n < input.num(); ++n)
for (int c = 0; c < src.channels(); ++c)
{
for (int c = 0; c < input.channels(); ++c)
{
float *srcData = input.ptrf(n, c);
float *dstData = output.ptrf(n, c);
const float *srcData = src.ptrf(n, c);
float *dstData = dst.ptrf(n, c);
for (int ph = 0; ph < outH; ++ph)
for (int ph = 0; ph < out.height; ++ph)
{
for (int pw = 0; pw < out.width; ++pw)
{
for (int pw = 0; pw < outW; ++pw)
{
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW;
int hend = min(hstart + kernelH, inpH + padH);
int wend = min(wstart + kernelW, inpW + padW);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, inpH);
wend = min(wend, inpW);
dstData[ph * outW + pw] = 0.f;
for (int h = hstart; h < hend; ++h)
for (int w = wstart; w < wend; ++w)
dstData[ph * outW + pw] += srcData[h * inpW + w];
dstData[ph * outW + pw] /= poolSize;
}
int hstart = ph * stride.height - pad.height;
int wstart = pw * stride.width - pad.width;
int hend = min(hstart + kernel.height, inp.height + pad.height);
int wend = min(wstart + kernel.width, inp.width + pad.width);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, inp.height);
wend = min(wend, inp.width);
dstData[ph * out.width + pw] = 0.f;
for (int h = hstart; h < hend; ++h)
for (int w = wstart; w < wend; ++w)
dstData[ph * out.width + pw] += srcData[h * inp.width + w];
dstData[ph * out.width + pw] /= poolSize;
}
}
}
}
}
}
void PoolingLayer::computeOutputShape(int inH, int inW)
{
//Yeah, something strange Caffe scheme-)
outH = static_cast<int>(ceil(static_cast<float>(inH + 2 * padH - kernelH) / strideH)) + 1;
outW = static_cast<int>(ceil(static_cast<float>(inW + 2 * padW - kernelW) / strideW)) + 1;
void PoolingLayerImpl::computeOutputShape(Size inpSz)
{
//Yeah, something strange Caffe scheme-)
out.height = static_cast<int>(ceil(static_cast<float>(inpSz.height + 2 * pad.height - kernel.height) / stride.height)) + 1;
out.width = static_cast<int>(ceil(static_cast<float>(inpSz.width + 2 * pad.width - kernel.width) / stride.width)) + 1;
if (padH || padW)
{
// If we have padding, ensure that the last pooling starts strictly
// inside the image (instead of at the padding); otherwise clip the last.
if ((outH - 1) * strideH >= inH + padH)
--outH;
if ((outW - 1) * strideW >= inW + padW)
--outW;
CV_Assert((outH - 1) * strideH < inH + padH);
CV_Assert((outW - 1) * strideW < inW + padW);
}
if (pad.height || pad.width)
{
// If we have padding, ensure that the last pooling starts strictly
// inside the image (instead of at the padding); otherwise clip the last.
if ((out.height - 1) * stride.height >= inpSz.height + pad.height)
--out.height;
if ((out.width - 1) * stride.width >= inpSz.width + pad.width)
--out.width;
CV_Assert((out.height - 1) * stride.height < inpSz.height + pad.height);
CV_Assert((out.width - 1) * stride.width < inpSz.width + pad.width);
}
}
Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size pad)
{
return Ptr<PoolingLayer>(new PoolingLayerImpl(type, kernel, stride, pad));
}
}
}

@ -1,4 +1,4 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
@ -42,37 +42,39 @@
#ifndef __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_POOLING_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class PoolingLayer : public Layer
{
enum
{
MAX,
AVE,
STOCHASTIC
};
int type;
int padH, padW;
int strideH, strideW;
int kernelH, kernelW;
class PoolingLayerImpl : public PoolingLayer
{
bool useOpenCL;
Size inp, out;
void computeOutputShape(Size inpSz);
bool pooling_ocl(const char *kname, const Blob &src, Blob &dst, Blob *mask = NULL);
void maxPooling(Blob &src, Blob &dst);
void maxPooling_cpu(Blob &src, Blob &dst);
bool maxPooling_ocl(Blob &src, Blob &dst);
void avePooling(Blob &src, Blob &dst);
void avePooling_cpu(Blob &src, Blob &dst);
bool avePooling_ocl(Blob &src, Blob &dst);
public:
int inpH, inpW;
int outH, outW;
PoolingLayerImpl();
PoolingLayerImpl(int type, Size kernel, Size stride, Size pad);
void computeOutputShape(int inpH, int inpW);
void maxPooling(Blob &input, Blob &output);
void avePooling(Blob &input, Blob &output);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
public:
PoolingLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif

@ -0,0 +1,440 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "../precomp.hpp"
#include "recurrent_layers.hpp"
#include "op_blas.hpp"
#include <iostream>
#include <cmath>
namespace cv
{
namespace dnn
{
template<typename Dtype>
static void tanh(const Mat &src, Mat &dst)
{
MatConstIterator_<Dtype> itSrc = src.begin<Dtype>();
MatIterator_<Dtype> itDst = dst.begin<Dtype>();
for (; itSrc != src.end<Dtype>(); itSrc++, itDst++)
*itDst = std::tanh(*itSrc);
}
static void tanh(const Mat &src, Mat &dst)
{
dst.create(src.dims, (const int*)src.size, src.type());
if (src.type() == CV_32F)
tanh<float>(src, dst);
else if (src.type() == CV_64F)
tanh<double>(src, dst);
else
CV_Error(Error::StsUnsupportedFormat, "Function supports only floating point types");
}
static void sigmoid(const Mat &src, Mat &dst)
{
cv::exp(-src, dst);
cv::pow(1 + dst, -1, dst);
}
class LSTMLayerImpl : public LSTMLayer
{
int numOut, numTimeStamps, numSamples, numInp;
Mat hInternal, cInternal;
Mat gates, dummyOnes;
int dtype;
bool allocated;
BlobShape outTailShape; //shape of single output sample
BlobShape outTsMatShape, outTsShape; //shape of N output samples
BlobShape outResShape; //shape of T timestamps and N output samples
bool useTimestampDim;
bool produceCellOutput;
public:
LSTMLayerImpl()
{
type = "LSTM";
useTimestampDim = true;
produceCellOutput = false;
allocated = false;
outTailShape = BlobShape::empty();
}
void setUseTimstampsDim(bool use)
{
CV_Assert(!allocated);
useTimestampDim = use;
}
void setProduceCellOutput(bool produce)
{
CV_Assert(!allocated);
produceCellOutput = produce;
}
void setC(const Blob &C)
{
CV_Assert(cInternal.empty() || C.total() == cInternal.total());
if (!cInternal.empty())
C.reshaped(BlobShape::like(cInternal)).matRefConst().copyTo(cInternal);
else
C.matRefConst().copyTo(cInternal);
}
void setH(const Blob &H)
{
CV_Assert(hInternal.empty() || H.total() == hInternal.total());
if (!hInternal.empty())
H.reshaped(BlobShape::like(hInternal)).matRefConst().copyTo(hInternal);
else
H.matRefConst().copyTo(hInternal);
}
Blob getC() const
{
CV_Assert(!cInternal.empty());
//TODO: add convinient Mat -> Blob constructor
Blob res(outTsShape, cInternal.type());
res.fill(res.shape(), res.type(), cInternal.data);
return res;
}
Blob getH() const
{
CV_Assert(!hInternal.empty());
Blob res(outTsShape, hInternal.type());
res.fill(res.shape(), res.type(), hInternal.data);
return res;
}
void setOutShape(const BlobShape &outTailShape_)
{
CV_Assert(!allocated || outTailShape_.total() == outTailShape.total());
outTailShape = outTailShape_;
}
void setWeights(const Blob &Wh, const Blob &Wx, const Blob &bias)
{
CV_Assert(Wh.dims() == 2 && Wx.dims() == 2);
CV_Assert(Wh.size(0) == Wx.size(0));
CV_Assert(Wh.size(0) == 4*Wh.size(1));
CV_Assert(Wh.size(0) == (int)bias.total());
CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type());
blobs.resize(3);
blobs[0] = Wh;
blobs[1] = Wx;
blobs[2] = bias;
blobs[2].reshape(BlobShape(1, (int)bias.total()));
}
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
{
CV_Assert(blobs.size() == 3);
CV_Assert(input.size() == 1);
Blob &Wh = blobs[0], &Wx = blobs[1];
numOut = Wh.size(1);
numInp = Wx.size(1);
if (!outTailShape.isEmpty())
CV_Assert(outTailShape.total() == numOut);
else
outTailShape = BlobShape(numOut);
if (useTimestampDim)
{
CV_Assert(input[0]->dims() >= 2 && (int)input[0]->total(2) == numInp);
numTimeStamps = input[0]->size(0);
numSamples = input[0]->size(1);
outResShape = BlobShape(numTimeStamps, numSamples) + outTailShape;
}
else
{
CV_Assert(input[0]->dims() >= 1 && (int)input[0]->total(1) == numInp);
numTimeStamps = 1;
numSamples = input[0]->size(0);
outResShape = BlobShape(numSamples) + outTailShape;
}
outTsMatShape = BlobShape(numSamples, numOut);
outTsShape = BlobShape(numSamples) + outTailShape;
dtype = input[0]->type();
CV_Assert(dtype == CV_32F || dtype == CV_64F);
CV_Assert(Wh.type() == dtype);
output.resize( (produceCellOutput) ? 2 : 1 );
output[0].create(outResShape, dtype);
if (produceCellOutput)
output[1].create(outResShape, dtype);
if (hInternal.empty())
{
hInternal.create(outTsMatShape.dims(), outTsMatShape.ptr(), dtype);
hInternal.setTo(0);
}
else
{
CV_Assert((int)hInternal.total() == numSamples*numOut);
hInternal = hInternal.reshape(1, outTsMatShape.dims(), outTsMatShape.ptr());
}
if (cInternal.empty())
{
cInternal.create(outTsMatShape.dims(), outTsMatShape.ptr(), dtype);
cInternal.setTo(0);
}
else
{
CV_Assert((int)cInternal.total() == numSamples*numOut);
cInternal = cInternal.reshape(1, outTsMatShape.dims(), outTsMatShape.ptr());
}
gates.create(numSamples, 4*numOut, dtype);
dummyOnes.create(numSamples, 1, dtype);
dummyOnes.setTo(1);
allocated = true;
}
void forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
const Mat &Wh = blobs[0].matRefConst();
const Mat &Wx = blobs[1].matRefConst();
const Mat &bias = blobs[2].matRefConst();
int numSamplesTotal = numTimeStamps*numSamples;
Mat xTs = input[0]->reshaped(BlobShape(numSamplesTotal, numInp)).matRefConst();
BlobShape outMatShape(numSamplesTotal, numOut);
Mat hOutTs = output[0].reshaped(outMatShape).matRef();
Mat cOutTs = (produceCellOutput) ? output[1].reshaped(outMatShape).matRef() : Mat();
for (int ts = 0; ts < numTimeStamps; ts++)
{
Range curRowRange(ts*numSamples, (ts + 1)*numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemmCPU(xCurr, Wx, 1, gates, 0, GEMM_2_T); // Wx * x_t
gemmCPU(hInternal, Wh, 1, gates, 1, GEMM_2_T); //+Wh * h_{t-1}
gemmCPU(dummyOnes, bias, 1, gates, 1); //+b
Mat getesIFO = gates.colRange(0, 3*numOut);
Mat gateI = gates.colRange(0*numOut, 1*numOut);
Mat gateF = gates.colRange(1*numOut, 2*numOut);
Mat gateO = gates.colRange(2*numOut, 3*numOut);
Mat gateG = gates.colRange(3*numOut, 4*numOut);
sigmoid(getesIFO, getesIFO);
tanh(gateG, gateG);
//compute c_t
cv::multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1}
cv::multiply(gateI, gateG, gateI); // i_t (*) g_t
cv::add(gateF, gateI, cInternal); // c_t = f_t (*) c_{t-1} + i_t (*) g_t
//compute h_t
tanh(cInternal, hInternal);
cv::multiply(gateO, hInternal, hInternal);
//save results in output blobs
hInternal.copyTo(hOutTs.rowRange(curRowRange));
if (produceCellOutput)
cInternal.copyTo(cOutTs.rowRange(curRowRange));
}
}
};
Ptr<LSTMLayer> LSTMLayer::create()
{
return Ptr<LSTMLayer>(new LSTMLayerImpl());
}
void LSTMLayer::forward(std::vector<Blob*>&, std::vector<Blob>&)
{
CV_Error(Error::StsInternal, "This function should be unreached");
}
int LSTMLayer::inputNameToIndex(String inputName)
{
if (inputName.toLowerCase() == "x")
return 0;
return -1;
}
int LSTMLayer::outputNameToIndex(String outputName)
{
if (outputName.toLowerCase() == "h")
return 0;
else if (outputName.toLowerCase() == "c")
return 1;
return -1;
}
class RNNLayerImpl : public RNNLayer
{
int numX, numH, numO;
int numSamples, numTimestamps, numSamplesTotal;
int dtype;
Mat Whh, Wxh, bh;
Mat Who, bo;
Mat hCurr, hPrev, dummyBiasOnes;
bool produceH;
public:
RNNLayerImpl()
{
type = "RNN";
produceH = false;
}
void setProduceHiddenOutput(bool produce = false)
{
produceH = produce;
}
void setWeights(const Blob &W_xh, const Blob &b_h, const Blob &W_hh, const Blob &W_ho, const Blob &b_o)
{
CV_Assert(W_hh.dims() == 2 && W_xh.dims() == 2);
CV_Assert(W_hh.size(0) == W_xh.size(0) && W_hh.size(0) == W_hh.size(1) && (int)b_h.total() == W_xh.size(0));
CV_Assert(W_ho.size(0) == (int)b_o.total());
CV_Assert(W_ho.size(1) == W_hh.size(1));
blobs.resize(5);
blobs[0] = W_xh;
blobs[1] = b_h;
blobs[2] = W_hh;
blobs[3] = W_ho;
blobs[4] = b_o;
}
void allocate(const std::vector<Blob*> &input, std::vector<Blob> &output)
{
CV_Assert(input.size() >= 1 && input.size() <= 2);
Wxh = blobs[0].matRefConst();
bh = blobs[1].matRefConst();
Whh = blobs[2].matRefConst();
Who = blobs[3].matRefConst();
bo = blobs[4].matRefConst();
numH = Wxh.rows;
numX = Wxh.cols;
numO = Who.rows;
CV_Assert(input[0]->dims() >= 2);
CV_Assert((int)input[0]->total(2) == numX);
CV_Assert(input[0]->type() == CV_32F || input[0]->type() == CV_64F);
dtype = input[0]->type();
numTimestamps = input[0]->size(0);
numSamples = input[0]->size(1);
numSamplesTotal = numTimestamps * numSamples;
hCurr.create(numSamples, numH, dtype);
hPrev.create(numSamples, numH, dtype);
hPrev.setTo(0);
dummyBiasOnes.create(numSamples, 1, dtype);
dummyBiasOnes.setTo(1);
bh = bh.reshape(1, 1); //is 1 x numH Mat
bo = bo.reshape(1, 1); //is 1 x numO Mat
reshapeOutput(output);
}
void reshapeOutput(std::vector<Blob> &output)
{
output.resize((produceH) ? 2 : 1);
output[0].create(BlobShape(numTimestamps, numSamples, numO), dtype);
if (produceH)
output[1].create(BlobShape(numTimestamps, numSamples, numH), dtype);
}
void forward(std::vector<Blob*> &input, std::vector<Blob> &output)
{
Mat xTs = input[0]->reshaped(BlobShape(numSamplesTotal, numX)).matRefConst();
Mat oTs = output[0].reshaped(BlobShape(numSamplesTotal, numO)).matRef();
Mat hTs = (produceH) ? output[1].reshaped(BlobShape(numSamplesTotal, numH)).matRef() : Mat();
for (int ts = 0; ts < numTimestamps; ts++)
{
Range curRowRange = Range(ts * numSamples, (ts + 1) * numSamples);
Mat xCurr = xTs.rowRange(curRowRange);
gemmCPU(hPrev, Whh, 1, hCurr, 0, GEMM_2_T); // W_{hh} * h_{prev}
gemmCPU(xCurr, Wxh, 1, hCurr, 1, GEMM_2_T); //+W_{xh} * x_{curr}
gemmCPU(dummyBiasOnes, bh, 1, hCurr, 1); //+bh
tanh(hCurr, hPrev);
Mat oCurr = oTs.rowRange(curRowRange);
gemmCPU(hPrev, Who, 1, oCurr, 0, GEMM_2_T); // W_{ho} * h_{prev}
gemmCPU(dummyBiasOnes, bo, 1, oCurr, 1); //+b_o
tanh(oCurr, oCurr);
if (produceH)
hPrev.copyTo(hTs.rowRange(curRowRange));
}
}
};
void RNNLayer::forward(std::vector<Blob*>&, std::vector<Blob>&)
{
CV_Error(Error::StsInternal, "This function should be unreached");
}
CV_EXPORTS_W Ptr<RNNLayer> RNNLayer::create()
{
return Ptr<RNNLayer>(new RNNLayerImpl());
}
}
}

@ -0,0 +1,54 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__
#define __OPENCV_DNN_LAYERS_RECURRENT_LAYERS_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
}
}
#endif

@ -42,125 +42,46 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "reshape_layer.hpp"
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
ReshapeLayer::ReshapeLayer(LayerParams &params) : Layer(params)
ReshapeLayerImpl::ReshapeLayerImpl(const BlobShape &newShape_, Range applyingRange_)
{
inAxis = params.get<int>("axis", 0);
inNumAxes = params.get<int>("num_axes", -1);
CV_Assert(inNumAxes >= -1);
autoAxisIdx = -1;
if (!params.has("dim"))
{
shapeDesc = BlobShape(0);
return;
}
DictValue paramShape = params.get("dim");
shapeDesc = BlobShape(paramShape.size());
for (int i = 0; i < paramShape.size(); i++)
{
int dim = paramShape.get<int>(i);
CV_Assert(dim >= -1);
if (dim == -1)
{
if (autoAxisIdx != -1)
CV_Error(Error::StsBadArg, "New shape contains multiple -1 dims");
autoAxisIdx = i;
}
shapeDesc[i] = dim;
}
newShapeDesc = newShape_;
newShapeRange = applyingRange_;
}
void ReshapeLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void ReshapeLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
outputs.resize(inputs.size());
outShapes.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++)
{
Blob &inpBlob = *inputs[i];
Blob &outBlob = outputs[i];
BlobShape inpShape = inpBlob.shape();
int startAxis = (inAxis >= 0) ? inAxis : inpShape.dims() + 1 + inAxis;
int endAxis = (inNumAxes == -1) ? inpShape.dims() : startAxis + inNumAxes;
CV_Assert(0 <= startAxis && startAxis <= inpShape.dims());
CV_Assert(0 <= endAxis && endAxis <= inpShape.dims());
int newDims = inpShape.dims() - (endAxis - startAxis) + shapeDesc.dims();
BlobShape outShape(newDims);
computeOutputShape(startAxis, endAxis, inpShape, outShape);
outBlob.shareFrom(inpBlob);
outBlob.reshape(outShape);
outShapes[i] = computeShapeByReshapeMask(inputs[i]->shape(), newShapeDesc, newShapeRange);
outputs[i].shareFrom(*inputs[i]);
outputs[i].reshape(outShapes[i]);
}
}
void ReshapeLayer::computeOutputShape(int startAxis, int endAxis, BlobShape &inpShape, BlobShape &outShape)
void ReshapeLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
int idx = 0;
for (int i = 0; i < startAxis; i++)
outShape[idx++] = inpShape[i];
for (int i = 0; i < shapeDesc.dims(); i++)
{
if (shapeDesc[i] == 0)
{
int inpAxisIdx = startAxis + i;
if (inpAxisIdx < 0 || inpShape.dims() <= inpAxisIdx)
CV_Error(Error::StsOutOfRange, "copy dimension (which has zero size) is not presented into reshaped blob");
outShape[idx++] = inpShape[startAxis + i];
}
else
{
outShape[idx++] = (shapeDesc[i] > 0) ? shapeDesc[i] : 1;
}
}
for (int i = endAxis; i < inpShape.dims(); i++)
outShape[idx++] = inpShape[i];
if (autoAxisIdx >= 0)
{
size_t total = inpShape.total();
size_t curTotal = 1;
for (int i = 0; i < outShape.dims(); i++)
{
if (i != startAxis + autoAxisIdx)
curTotal *= outShape[i];
}
CV_DbgAssert(curTotal <= total && total % curTotal == 0);
outShape[startAxis + autoAxisIdx] = (int)(total / curTotal);
}
if (inpShape.total() != outShape.total())
for (size_t i = 0; i < outputs.size(); i++)
{
CV_Error(Error::StsUnmatchedSizes, "Mismatch between input and output blob elements count");
outputs[i].shareFrom(*inputs[i]);
outputs[i].reshape(outShapes[i]);
}
}
Ptr<Layer> createFlattenLayer(LayerParams&)
Ptr<ReshapeLayer> ReshapeLayer::create(const BlobShape &newShape, Range applyingRange /*= Range::all()*/)
{
LayerParams params;
int shapeDesc[] = {0, -1};
params.set("dim", DictValue::arrayInt(shapeDesc, 2));
return Ptr<Layer>(new ReshapeLayer(params));
return Ptr<ReshapeLayer>(new ReshapeLayerImpl(newShape, applyingRange));
}
}
}

@ -42,26 +42,23 @@
#ifndef __OPENCV_DNN_LAYERS_RESHAPE_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_RESHAPE_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class ReshapeLayer : public Layer
class ReshapeLayerImpl : public ReshapeLayer
{
std::vector<BlobShape> outShapes;
public:
ReshapeLayer(LayerParams &params);
ReshapeLayerImpl(const BlobShape &newShape_, Range applyingRange_);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*>&, std::vector<Blob>&) {}
protected:
BlobShape shapeDesc;
int inAxis, inNumAxes, autoAxisIdx;
void computeOutputShape(int startAxis, int endAxis, BlobShape &inpShape, BlobShape &outShape);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
Ptr<Layer> createFlattenLayer(LayerParams&);

@ -42,55 +42,57 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "slice_layer.hpp"
#include <opencv2/core/ocl.hpp>
#include <opencv2/dnn/shape_utils.hpp>
namespace cv
{
namespace dnn
{
SliceLayer::SliceLayer(LayerParams &params) : Layer(params)
SliceLayerImpl::SliceLayerImpl(int axis_ /*= 1*/)
{
inAxis = params.get<int>("axis", 1);
if (!params.has("slice_point"))
return;
axis = axis_;
}
const DictValue &_slicePoints = params.get("slice_point");
slicePoints.resize(_slicePoints.size());
for (int i = 0; i < _slicePoints.size(); i++)
{
slicePoints[i] = _slicePoints.get<int>(i);
CV_Assert(slicePoints[i] > 0 && (i == 0 || slicePoints[i-1] < slicePoints[i]));
}
SliceLayerImpl::SliceLayerImpl(int axis_, const std::vector<int> &sliceIndices_)
{
axis = axis_;
sliceIndices = sliceIndices_;
}
void SliceLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void SliceLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
const Blob inpBlob = *inputs[0];
int axis = inpBlob.canonicalAxis(inAxis);
int axisSize = inpBlob.size(axis);
const Blob &inpBlob = *inputs[0];
useOpenCL = ocl::useOpenCL() && inpBlob.getState() == Blob::HEAD_AT_UMAT;
axisIdx = inpBlob.canonicalAxis(axis);
int axisSize = inpBlob.size(axisIdx);
BlobShape inpShape = inpBlob.shape();
int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
if (slicePoints.size()) //divide blob with respect to passed parameters
if (sliceIndices.size()) //divide blob with respect to passed parameters
{
std::vector<int> outAxisSize;
int prevSlice = 0;
for (size_t i = 0; i < slicePoints.size(); i++)
for (size_t i = 0; i < sliceIndices.size(); i++)
{
CV_Assert(prevSlice < slicePoints[i] && slicePoints[i] < axisSize);
outAxisSize.push_back(slicePoints[i] - prevSlice);
prevSlice = slicePoints[i];
if (!(prevSlice < sliceIndices[i] && sliceIndices[i] < axisSize))
CV_Error(Error::StsBadArg, "Slice indices should be positive, increased and don't exceed size of sliced dimension");
outAxisSize.push_back(sliceIndices[i] - prevSlice);
prevSlice = sliceIndices[i];
}
outAxisSize.push_back(axisSize - prevSlice);
outputs.resize(outAxisSize.size());
for (size_t i = 0; i < outAxisSize.size(); i++)
{
inpShape[axis] = outAxisSize[i];
outputs[i].create(inpShape, inpBlob.type());
inpShape[axisIdx] = outAxisSize[i];
outputs[i].create(inpShape, inpBlob.type(), allocFlags);
}
}
else //divide blob with respect to count of output blobs
@ -100,30 +102,45 @@ void SliceLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &o
for (size_t i = 0; i < outputs.size(); i++)
{
inpShape[axis] = outAxisSize;
outputs[i].create(inpShape, inpBlob.type());
inpShape[axisIdx] = outAxisSize;
outputs[i].create(inpShape, inpBlob.type(), allocFlags);
}
}
}
void SliceLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void SliceLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
#ifdef HAVE_OPENCL
if (useOpenCL)
forward_<UMat>(inputs, outputs);
else
#endif
forward_<Mat>(inputs, outputs);
}
template<typename XMat>
void SliceLayerImpl::forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &inpBlob = *inputs[0];
const int axis = inpBlob.canonicalAxis(inAxis);
const Mat& inpMat = inpBlob.matRef();
const XMat& inpMat = inputs[0]->getRefConst<XMat>();
std::vector<Range> ranges(inputs[0]->dims(), Range::all());
std::vector<Range> ranges(inpBlob.dims(), Range::all());
int sizeStart = 0;
ranges[axisIdx].start = 0;
for (size_t i = 0; i < outputs.size(); i++)
{
int sizeEnd = sizeStart + outputs[i].size(axis);
ranges[axis] = Range(sizeStart, sizeEnd);
ranges[axisIdx].end = ranges[axisIdx].start + outputs[i].size(axisIdx);
inpMat(&ranges[0]).copyTo(outputs[i].getRef<XMat>());
ranges[axisIdx].start = ranges[axisIdx].end;
}
}
Mat inpSubMat = inpMat(&ranges[0]);
inpSubMat.copyTo(outputs[i].matRef());
Ptr<SliceLayer> SliceLayer::create(int axis)
{
return Ptr<SliceLayer>(new SliceLayerImpl(axis));
}
sizeStart = sizeEnd;
}
Ptr<SliceLayer> SliceLayer::create(int axis, const std::vector<int> &sliceIndices)
{
return Ptr<SliceLayer>(new SliceLayerImpl(axis, sliceIndices));
}
}

@ -42,24 +42,28 @@
#ifndef __OPENCV_DNN_LAYERS_SLICE_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_SLICE_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class SliceLayer : public Layer
class SliceLayerImpl : public SliceLayer
{
bool useOpenCL;
int axisIdx;
template<typename XMat>
void forward_(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
public:
SliceLayer(LayerParams &params);
SliceLayerImpl(int axis_ = 1);
SliceLayerImpl(int axis_, const std::vector<int> &sliceIndices_);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private:
int inAxis;
std::vector<int> slicePoints;
};
}

@ -42,6 +42,8 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "softmax_layer.hpp"
#include <opencv2/core/ocl.hpp>
#include "modules/dnn/opencl_kernels_dnn.hpp"
#include <algorithm>
#include <stdlib.h>
using std::max;
@ -50,95 +52,173 @@ namespace cv
{
namespace dnn
{
//TODO: set default axis number to 1, and add custom shape length in FullyConnected
SoftMaxLayer::SoftMaxLayer(LayerParams &params) : Layer(params)
SoftMaxLayerImpl::SoftMaxLayerImpl(int axis)
{
axisRaw = axis;
}
void SoftMaxLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
axis = inputs[0]->canonicalAxis(axisRaw);
useOpenCL = ocl::useOpenCL();
BlobShape shape = inputs[0]->shape();
outerSize = shape.total(0, axis);
channels = shape[axis];
innerSize = shape.total(axis + 1);
int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
shape[axis] = 1;
buf.create(shape, inputs[0]->type(), allocFlag);
outputs.resize(1);
outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag);
}
void SoftMaxLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
if (!useOpenCL)
forward_cpu(src, dst);
else
{
//hotfix!!!
axis_ = params.get<int>("axis", 1);
CV_Assert(forward_ocl(src, dst));
}
}
void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
axis = inputs[0]->canonicalAxis(axis_);
#ifdef HAVE_OPENCL
bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst)
{
const UMat &srcMat = src.umatRefConst();
UMat &dstMat = dst.umatRef();
srcMat.copyTo(dstMat);
UMat &bufMat = buf.umatRef();
CV_Assert(dstMat.offset == 0);
BlobShape shape = inputs[0]->shape();
outputs.resize(1);
outputs[0].create(shape);
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
ocl::Kernel kmax, ksub, ksum, kdiv;
shape[axis] = 1;
maxAggregator.create(shape);
}
if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
Blob &src = *inputs[0];
Blob &dst = outputs[0];
if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
float *srcPtr = src.ptrf();
float *dstPtr = dst.ptrf();
float *bufPtr = maxAggregator.ptrf();
if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
size_t outerSize = src.total(0, axis);
size_t channels = src.size(axis);
size_t innerSize = src.total(axis + 1);
if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
return false;
size_t outerStep = src.total(axis);
size_t cnStep = src.total(axis + 1);
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
size_t bufSize = buf.total();
size_t totalSize = src.total();
//compute max along axis
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
kmax.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!kmax.run(1, &bufSize, &wgSize, true))
return false;
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!ksub.run(1, &totalSize, &wgSize, true))
return false;
cv::exp(dstMat, dstMat);
ksum.args((int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
if (!ksum.run(1, &bufSize, &wgSize, true))
return false;
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
if (!kdiv.run(1, &totalSize, &wgSize, true))
return false;
return true;
}
#else
bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&)
{
return false;
}
#endif
void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst)
{
CV_Assert(src.type() == CV_32F);
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
float *srcPtr = src.ptrf();
float *dstPtr = dst.ptrf();
float *bufPtr = buf.ptrf();
size_t outerStep = src.total(axis);
size_t cnStep = src.total(axis + 1);
//compute max along axis
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
for (size_t cnDim = 1; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
}
memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
for (size_t cnDim = 1; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
}
}
//subtract max
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//subtract max
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
}
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
}
}
cv::exp(dst.matRef(), dst.matRef());
cv::exp(dst.matRef(), dst.matRef());
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
//sum exp along axis
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f;
for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
size_t srcOffset = outerDim * outerStep;
size_t bufOffset = outerDim * cnStep;
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
}
//sum exp along axis
//divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] = 0.f;
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
}
//divide by computed sum
for (size_t cnDim = 0; cnDim < channels; cnDim++)
{
for (size_t i = 0; i < innerSize; i++)
dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
}
dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
}
}
}
Ptr<SoftmaxLayer> SoftmaxLayer::create(int axis)
{
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(axis));
}
}
}

@ -42,21 +42,31 @@
#ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class SoftMaxLayer : public Layer
{
int axis_, axis;
Blob maxAggregator;
public:
SoftMaxLayer(LayerParams &params);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
class SoftMaxLayerImpl : public SoftmaxLayer
{
int axis, axisRaw;
Blob buf;
bool useOpenCL;
size_t outerSize, channels, innerSize;
bool forward_ocl(Blob &src, Blob &dst);
void forward_cpu(Blob &src, Blob &dst);
public:
SoftMaxLayerImpl(int axis = 1);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
};
}
}
#endif

@ -42,41 +42,46 @@
#include "../precomp.hpp"
#include "layers_common.hpp"
#include "split_layer.hpp"
#include <opencv2/core/ocl.hpp>
namespace cv
{
namespace dnn
{
//TODO: maybe "top_count" param is useless because it can be determined by output connections number?
SplitLayer::SplitLayer(LayerParams &params) : Layer(params)
SplitLayerImpl::SplitLayerImpl(int outputsCount_ /*= -1*/)
{
if (params.has("top_count"))
{
outputsNum = params.get<int>("top_count");
CV_Assert(outputsNum >= 0);
}
else
{
outputsNum = -1;
}
outputsCount = outputsCount_;
}
void SplitLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void SplitLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
CV_Assert(inputs.size() == 1);
useOpenCL = ocl::useOpenCL() && inputs[0]->getState() == Blob::HEAD_AT_UMAT;
int allocFlags = useOpenCL ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
if (outputsNum >= 0)
outputs.resize(outputsNum);
if (outputsCount >= 0)
outputs.resize(outputsCount);
for (size_t i = 0; i < outputs.size(); i++)
outputs[i].create(inputs[0]->shape(), inputs[0]->type());
outputs[i].create(inputs[0]->shape(), inputs[0]->type(), allocFlags);
}
void SplitLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
void SplitLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
{
for (size_t i = 0; i < outputs.size(); i++)
inputs[0]->matRefConst().copyTo(outputs[i].matRef());
{
if (useOpenCL)
inputs[0]->umatRefConst().copyTo(outputs[i].umatRef());
else
inputs[0]->matRefConst().copyTo(outputs[i].matRef());
}
}
Ptr<SplitLayer> SplitLayer::create(int outputsCount)
{
return Ptr<SplitLayer>(new SplitLayerImpl(outputsCount));
}
}

@ -42,23 +42,23 @@
#ifndef __OPENCV_DNN_LAYERS_SPLIT_LAYER_HPP__
#define __OPENCV_DNN_LAYERS_SPLIT_LAYER_HPP__
#include "../precomp.hpp"
#include <opencv2/dnn/all_layers.hpp>
namespace cv
{
namespace dnn
{
class SplitLayer : public Layer
class SplitLayerImpl : public SplitLayer
{
bool useOpenCL;
public:
SplitLayer(LayerParams &params);
SplitLayerImpl(int outputsCount_ = -1);
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
private:
int outputsNum;
};
}

@ -0,0 +1,44 @@
__kernel void ReLUForward(const int count, __global const T* in, __global T* out
#ifndef RELU_NO_SLOPE
, T negative_slope
#endif
) {
int index = get_global_id(0);
if(index < count)
#ifndef RELU_NO_SLOPE
out[index] = in[index] > 0 ? in[index] : in[index] * negative_slope;
#else
out[index] = in[index] > 0 ? in[index] : 0;
#endif
}
__kernel void TanHForward(const int count, __global T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = tanh(in[index]);
}
__kernel void SigmoidForward(const int count, __global const T* in, __global T* out) {
int index = get_global_id(0);
if(index < count)
out[index] = 1. / (1. + exp(-in[index]));
}
__kernel void BNLLForward(const int n, __global const T* in, __global T* out) {
int index = get_global_id(0);
if (index < n) {
out[index] = in[index] > 0 ? in[index] + log(1. + exp(-in[index])) : log(1. + exp(in[index]));
}
}
__kernel void AbsValForward(const int n, __global const T* in, __global T* out) {
int index = get_global_id(0);
if (index < n)
out[index] = fabs(in[index]);
}
__kernel void PowForward(const int n, __global const T* in, __global T* out, const T power, const T scale, const T shift) {
int index = get_global_id(0);
if (index < n)
out[index] = pow(shift + scale * in[index], power);
}

@ -0,0 +1,62 @@
/*************************************************************************************
* Copyright (c) 2015, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
__kernel void col2im(const int n, __global const T* data_col, const int col_offset,
const int height, const int width, const int channels,
const int patch_h, const int patch_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
__global T* data_im, const int img_offset)
{
data_col = data_col + col_offset;
data_im = data_im + img_offset;
int index = get_global_id(0);
if(index < n) {
T val = 0;
int w = index % width + pad_w;
int h = (index / width) % height + pad_h;
int c = index / (width * height);
// compute the start and end of the output
int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1;
int w_col_end = min(w / stride_w + 1, width_col);
int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1;
int h_col_end = min(h / stride_h + 1, height_col);
// equivalent implementation
int offset =
(c * patch_h * patch_w + h * patch_w + w) * height_col * width_col;
int coeff_h_col = (1 - stride_h * patch_w * height_col) * width_col;
int coeff_w_col = (1 - stride_w * height_col * width_col);
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
}
}
data_im[index] = val;
}
}

@ -39,11 +39,11 @@
//
//M*/
__kernel void im2col(__global const float *im_src, int im_src_offset,
__kernel void im2col(__global const T *im_src, int im_src_offset,
int channels, int height_inp, int width_inp,
int kernel_h, int kernel_w, int pad_h, int pad_w, int stride_h, int stride_w,
int height_out, int width_out,
__global float *im_col, int im_col_offset
__global T *im_col, int im_col_offset
)
{
int index = get_global_id(0);
@ -52,13 +52,13 @@ __kernel void im2col(__global const float *im_src, int im_src_offset,
int j_out = index % width_out;
int i_out = (index / width_out) % height_out;
int c_inp = (index / width_out) / height_out;
int c_out = c_inp * kernel_h * kernel_w;
int i_inp = i_out * stride_h - pad_h;
int j_inp = j_out * stride_w - pad_w;
im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp + im_src_offset / sizeof(float);
im_col += (c_out * height_out + i_out) * width_out + j_out + im_col_offset / sizeof(float);
im_src += (c_inp * height_inp + i_inp) * width_inp + j_inp + im_src_offset;
im_col += (c_out * height_out + i_out) * width_out + j_out + im_col_offset;
for (int ki = 0; ki < kernel_h; ++ki)
for (int kj = 0; kj < kernel_w; ++kj) {

@ -0,0 +1,76 @@
/*************************************************************************************
* Copyright (c) 2015, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
__kernel void LRNComputeOutput(const int nthreads, __global T* in, __global T* scale, const T negative_beta, __global T* out) {
int index = get_global_id(0);
int tmp = get_global_size(0);
for(index; index < nthreads; index += tmp)
out[index] = in[index] * pow(scale[index], negative_beta);
}
__kernel void LRNFillScale(const int nthreads, __global T* in, const int num, const int channels, const int height, const int width, const int size, const T alpha_over_size, const T k, __global T* scale) {
int index = get_global_id(0);
int tmp = get_global_size(0);
for(index; index < nthreads; index += tmp) {
// find out the local offset
const int w = index % width;
const int h = (index / width) % height;
const int n = index / width / height;
const int offset = (n * channels * height + h) * width + w;
const int step = height * width;
in = in + offset;
scale = scale + offset;
int head = 0;
const int pre_pad = (size - 1) / 2;
const int post_pad = size - pre_pad - 1;
T accum_scale = 0;
// fill the scale at [n, :, h, w]
// accumulate values
while (head < post_pad && head < channels) {
accum_scale += in[head * step] * in[head * step];
++head;
}
// both add and subtract
while (head < channels) {
accum_scale += in[head * step] * in[head * step];
if (head - size >= 0) {
accum_scale -= in[(head - size) * step]
* in[(head - size) * step];
}
scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
++head;
}
// subtract only
while (head < channels + post_pad) {
if (head - size >= 0) {
accum_scale -= in[(head - size) * step]
* in[(head - size) * step];
}
scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
++head;
}
}
}

@ -0,0 +1,94 @@
/*************************************************************************************
* Copyright (c) 2015, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
__kernel void MaxPoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w, __global T* top_data
#ifdef MASK
, __global int* mask, __global T* top_mask
#endif
) {
int index = get_global_id(0);
int tmp = get_global_size(0);
for(index; index < nthreads; index += tmp) {
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels;
int hstart = ph * stride_h - pad_h;
int wstart = pw * stride_w - pad_w;
const int hend = min(hstart + kernel_h, height);
const int wend = min(wstart + kernel_w, width);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
T maxval = -FLT_MAX;
int maxidx = -1;
bottom_data =
bottom_data + (n * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (bottom_data[h * width + w] > maxval) {
maxidx = h * width + w;
maxval = bottom_data[maxidx];
}
}
}
top_data[index] = maxval;
#ifdef MASK
if (mask) {
mask[index] = maxidx;
} else {
top_mask[index] = maxidx;
}
#endif
}
}
__kernel void AvePoolForward(const int nthreads, __global T* bottom_data, const int num, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, const int kernel_h, const int kernel_w, const int stride_h, const int stride_w, const int pad_h, const int pad_w,__global T* top_data) {
int index = get_global_id(0);
int tmp = get_global_size(0);
for(index; index < nthreads; index+=tmp) {
int pw = index % pooled_width;
int ph = (index / pooled_width) % pooled_height;
int c = (index / pooled_width / pooled_height) % channels;
int n = index / pooled_width / pooled_height / channels; int hstart = ph * stride_h - pad_h; int wstart = pw * stride_w - pad_w;
int hend = min(hstart + kernel_h, height + pad_h);
int wend = min(wstart + kernel_w, width + pad_w);
const int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, height);
wend = min(wend, width);
T aveval = 0;
bottom_data =
bottom_data + (n * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
aveval += bottom_data[h * width + w];
}
}
top_data[index] = aveval / pool_size;
}
}

@ -0,0 +1,75 @@
/*************************************************************************************
* Copyright (c) 2015, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
__kernel void kernel_channel_max(const int num, const int channels,
const int spatial_dim, __global const T* data, __global T* out) {
int index = get_global_id(0);
if(index < num * spatial_dim) {
int n = index / spatial_dim;
int s = index % spatial_dim;
T maxval = -FLT_MAX;
for (int c = 0; c < channels; ++c) {
maxval = max(data[(n * channels + c) * spatial_dim + s], maxval);
}
out[index] = maxval;
}
}
__kernel void kernel_channel_subtract(const int count,
const int num, const int channels,
const int spatial_dim, __global const T* channel_max, __global T* data) {
int index = get_global_id(0);
if(index < count) {
int n = index / channels / spatial_dim;
int s = index % spatial_dim;
data[index] -= channel_max[n * spatial_dim + s];
}
}
__kernel void kernel_channel_sum(const int num, const int channels,
const int spatial_dim, __global const T* data, __global T* channel_sum) {
int index = get_global_id(0);
if(index < num * spatial_dim) {
int n = index / spatial_dim;
int s = index % spatial_dim;
T sum = 0;
for (int c = 0; c < channels; ++c) {
sum += data[(n * channels + c) * spatial_dim + s];
}
channel_sum[index] = sum;
}
}
__kernel void kernel_channel_div(const int count,
const int num, const int channels,
const int spatial_dim, __global const T* channel_sum, __global T* data) {
int index = get_global_id(0);
if(index < count) {
int n = index / channels / spatial_dim;
int s = index % spatial_dim;
data[index] /= channel_sum[n * spatial_dim + s];
}
}

@ -40,4 +40,5 @@
//M*/
#include <opencv2/core.hpp>
#include "cvconfig.h"
#include <opencv2/dnn.hpp>

@ -52,6 +52,12 @@ namespace dnn {
#if defined(ENABLE_TORCH_IMPORTER) && ENABLE_TORCH_IMPORTER
#include "THDiskFile.h"
#ifdef NDEBUG
static bool dbgPrint = false;
#else
static bool dbgPrint = true;
#endif
enum LuaType
{
TYPE_NIL = 0,
@ -290,7 +296,8 @@ struct TorchImporter : public ::cv::dnn::Importer
}
String key = readString();
std::cout << i << "th key: " << key << "\n";
if (dbgPrint)
std::cout << i << "th key: " << key << "\n";
fpos = THFile_position(file);
int vtype = readInt();
@ -334,13 +341,16 @@ struct TorchImporter : public ::cv::dnn::Importer
}
//Debug output
std::cout << "scalarParams:\n";
std::cout << scalarParams;
if (dbgPrint)
{
std::cout << "scalarParams:\n";
std::cout << scalarParams;
std::cout << "#" << tensorParams.size() << " tensorParams:\n";
std::map<String,Blob>::const_iterator it;
for (it = tensorParams.begin(); it != tensorParams.end(); it++)
std::cout << it->first << ": Tensor " << it->second.shape() << "\n";
std::cout << "#" << tensorParams.size() << " tensorParams:\n";
std::map<String,Blob>::const_iterator it;
for (it = tensorParams.begin(); it != tensorParams.end(); it++)
std::cout << it->first << ": Tensor " << it->second.shape() << "\n";
}
}
void readTorchTensor(int indexTensor, int typeTensor)
@ -435,7 +445,9 @@ struct TorchImporter : public ::cv::dnn::Importer
String className = readTorchClassName();
String nnName;
std::cout << "Class: " << className << std::endl;
if (dbgPrint)
std::cout << "Class: " << className << std::endl;
int type;
if ( (type = parseTensorType(className)) >= 0 ) //is Tensor

@ -42,6 +42,8 @@
#if defined(ENABLE_CAFFE_MODEL_TESTS)
#include "test_precomp.hpp"
#include "npy_blob.hpp"
#include <opencv2/core/ocl.hpp>
#include <opencv2/ts/ocl_test.hpp>
namespace cvtest
{
@ -55,7 +57,7 @@ static std::string _tf(TString filename)
return (getOpenCVExtraDir() + "/dnn/") + filename;
}
TEST(Reproducibility_GoogLeNet, Accuracy)
static void launchGoogleNetTest()
{
Net net;
{
@ -69,7 +71,7 @@ TEST(Reproducibility_GoogLeNet, Accuracy)
inpMats.push_back( imread(_tf("googlenet_1.jpg")) );
ASSERT_TRUE(!inpMats[0].empty() && !inpMats[1].empty());
net.setBlob(".data", Blob(inpMats));
net.setBlob(".data", Blob::fromImages(inpMats));
net.forward();
Blob out = net.getBlob("prob");
@ -77,5 +79,16 @@ TEST(Reproducibility_GoogLeNet, Accuracy)
normAssert(out, ref);
}
TEST(Reproducibility_GoogLeNet, Accuracy)
{
OCL_OFF(launchGoogleNetTest());
}
OCL_TEST(Reproducibility_GoogLeNet, Accuracy)
{
OCL_ON(launchGoogleNetTest());
OCL_OFF();
}
}
#endif

@ -43,6 +43,8 @@
#include <opencv2/core/ocl.hpp>
#include <iostream>
#include "npy_blob.hpp"
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/ts/ocl_test.hpp>
namespace cvtest
{
@ -56,7 +58,32 @@ static String _tf(TString filename)
return (getOpenCVExtraDir() + "/dnn/layers/") + filename;
}
static void testLayer(String basename, bool useCaffeModel = false, bool useCommonInputBlob = true)
enum RunLayerMode
{
ALLOC_ONLY = 1,
FORWARD_ONLY = 2,
ALLOC_AND_FORWARD = ALLOC_ONLY | FORWARD_ONLY
};
typedef Ptr<std::vector<Blob*> > PtrToVecPtrBlob;
PtrToVecPtrBlob
runLayer(Ptr<Layer> layer, std::vector<Blob> &inpBlobs, std::vector<Blob> &outBlobs, int mode = ALLOC_AND_FORWARD)
{
PtrToVecPtrBlob inpPtrs(new std::vector<Blob*>());
inpPtrs->reserve(inpBlobs.size());
for (size_t i = 0; i < inpBlobs.size(); i++)
inpPtrs->push_back(&inpBlobs[i]);
if (mode & ALLOC_ONLY) layer->allocate(*inpPtrs, outBlobs);
if (mode & FORWARD_ONLY) layer->forward(*inpPtrs, outBlobs);
return inpPtrs;
}
void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool useCommonInputBlob = true)
{
String prototxt = _tf(basename + ".prototxt");
String caffemodel = _tf(basename + ".caffemodel");
@ -64,6 +91,8 @@ static void testLayer(String basename, bool useCaffeModel = false, bool useCommo
String inpfile = (useCommonInputBlob) ? _tf("blob.npy") : _tf(basename + ".input.npy");
String outfile = _tf(basename + ".npy");
cv::setNumThreads(cv::getNumberOfCPUs());
Net net;
{
Ptr<Importer> importer = createCaffeImporter(prototxt, (useCaffeModel) ? caffemodel : String());
@ -83,58 +112,89 @@ static void testLayer(String basename, bool useCaffeModel = false, bool useCommo
TEST(Layer_Test_Softmax, Accuracy)
{
testLayer("layer_softmax");
OCL_OFF(testLayerUsingCaffeModels("layer_softmax"));
}
OCL_TEST(Layer_Test_Softmax, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_softmax"));
OCL_OFF();
}
TEST(Layer_Test_LRN_spatial, Accuracy)
{
testLayer("layer_lrn_spatial");
OCL_OFF(testLayerUsingCaffeModels("layer_lrn_spatial"));
}
OCL_TEST(Layer_Test_LRN_spatial, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_lrn_spatial"));
OCL_OFF();
}
TEST(Layer_Test_LRN_channels, Accuracy)
{
testLayer("layer_lrn_channels");
OCL_OFF(testLayerUsingCaffeModels("layer_lrn_channels"));
}
OCL_TEST(Layer_Test_LRN_channels, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_lrn_channels"));
OCL_OFF();
}
TEST(Layer_Test_Convolution, Accuracy)
{
testLayer("layer_convolution", true);
OCL_OFF(testLayerUsingCaffeModels("layer_convolution", true));
}
OCL_TEST(Layer_Test_Convolution, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_convolution", true));
OCL_OFF();
}
//TODO: move this test into separate file
TEST(Layer_Test_Convolution, AccuracyOCL)
TEST(Layer_Test_DeConvolution, Accuracy)
{
if (cv::ocl::haveOpenCL())
{
cv::ocl::setUseOpenCL(true);
testLayer("layer_convolution", true);
cv::ocl::setUseOpenCL(false);
}
OCL_OFF(testLayerUsingCaffeModels("layer_deconvolution", true, false));
}
OCL_TEST(Layer_Test_DeConvolution, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_deconvolution", true, false););
OCL_OFF();
}
TEST(Layer_Test_InnerProduct, Accuracy)
{
testLayer("layer_inner_product", true);
OCL_OFF(testLayerUsingCaffeModels("layer_inner_product", true));
}
OCL_TEST(Layer_Test_InnerProduct, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_inner_product", true));
OCL_OFF();
}
TEST(Layer_Test_Pooling_max, Accuracy)
{
testLayer("layer_pooling_max");
OCL_OFF(testLayerUsingCaffeModels("layer_pooling_max"));
OCL_ON();
}
OCL_TEST(Layer_Test_Pooling_max, Accuracy)
{
OCL_ON(testLayerUsingCaffeModels("layer_pooling_max"));
OCL_OFF();
}
TEST(Layer_Test_Pooling_ave, Accuracy)
{
testLayer("layer_pooling_ave");
OCL_OFF(testLayerUsingCaffeModels("layer_pooling_ave"));
OCL_ON();
}
TEST(Layer_Test_DeConvolution, Accuracy)
OCL_TEST(Layer_Test_Pooling_ave, Accuracy)
{
testLayer("layer_deconvolution", true, false);
OCL_ON(testLayerUsingCaffeModels("layer_pooling_ave"));
OCL_OFF();
}
TEST(Layer_Test_MVN, Accuracy)
{
testLayer("layer_mvn");
OCL_OFF(testLayerUsingCaffeModels("layer_mvn"));
}
TEST(Layer_Test_Reshape, squeeze)
@ -151,10 +211,31 @@ TEST(Layer_Test_Reshape, squeeze)
rl->allocate(inpVec, outVec);
rl->forward(inpVec, outVec);
EXPECT_EQ(outVec[0].shape(), BlobShape(Vec3i(4, 3, 2)));
EXPECT_EQ(outVec[0].shape(), BlobShape(4, 3, 2));
}
TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
//template<typename XMat>
//static void test_Layer_Concat()
//{
// Matx21f a(1.f, 1.f), b(2.f, 2.f), c(3.f, 3.f);
// std::vector<Blob> res(1), src = { Blob(XMat(a)), Blob(XMat(b)), Blob(XMat(c)) };
// Blob ref(XMat(Matx23f(1.f, 2.f, 3.f, 1.f, 2.f, 3.f)));
//
// runLayer(ConcatLayer::create(1), src, res);
// normAssert(ref, res[0]);
//}
//TEST(Layer_Concat, Accuracy)
//{
// OCL_OFF(test_Layer_Concat<Mat>());
//}
//OCL_TEST(Layer_Concat, Accuracy)
//{
// OCL_ON(test_Layer_Concat<Mat>());
// OCL_OFF();
//}
template<typename XMat>
void test_Reshape_Split_Slice_layers()
{
Net net;
{
@ -163,9 +244,9 @@ TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
importer->populateNet(net);
}
Blob input(BlobShape(Vec2i(6, 12)));
Blob input(BlobShape(6, 12));
RNG rng(0);
rng.fill(input.matRef(), RNG::UNIFORM, -1, 1);
rng.fill(input.getRef<XMat>(), RNG::UNIFORM, -1, 1);
net.setBlob(".input", input);
net.forward();
@ -173,5 +254,143 @@ TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
normAssert(input, output);
}
TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
{
OCL_OFF(test_Reshape_Split_Slice_layers<Mat>());
}
OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
{
OCL_ON(test_Reshape_Split_Slice_layers<UMat>());
OCL_OFF();
}
class Layer_LSTM_Test : public ::testing::Test
{
public:
int numInp, numOut;
Blob Wh, Wx, b;
Ptr<LSTMLayer> layer;
std::vector<Blob> inputs, outputs;
Layer_LSTM_Test() {}
void init(const BlobShape &inpShape_, const BlobShape &outShape_)
{
numInp = inpShape_.total();
numOut = outShape_.total();
Wh = Blob(BlobShape(4 * numOut, numOut));
Wx = Blob(BlobShape(4 * numOut, numInp));
b = Blob(BlobShape(4 * numOut, 1));
layer = LSTMLayer::create();
layer->setWeights(Wh, Wx, b);
layer->setOutShape(outShape_);
}
};
TEST_F(Layer_LSTM_Test, get_set_test)
{
BlobShape TN(4);
BlobShape inpShape(5, 3, 2), inpResShape = TN + inpShape;
BlobShape outShape(3, 1, 2), outResShape = TN + outShape;
init(inpShape, outShape);
layer->setProduceCellOutput(true);
layer->setUseTimstampsDim(false);
layer->setOutShape(outShape);
layer->setC(Blob(outResShape));
layer->setH(Blob(outResShape));
inputs.push_back(Blob(inpResShape));
runLayer(layer, inputs, outputs);
EXPECT_EQ(2, outputs.size());
EXPECT_EQ(outResShape, outputs[0].shape());
EXPECT_EQ(outResShape, outputs[1].shape());
EXPECT_EQ(outResShape, layer->getC().shape());
EXPECT_EQ(outResShape, layer->getH().shape());
EXPECT_EQ(0, layer->inputNameToIndex("x"));
EXPECT_EQ(0, layer->outputNameToIndex("h"));
EXPECT_EQ(1, layer->outputNameToIndex("c"));
}
TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent)
{
Ptr<LSTMLayer> layer = LSTMLayer::create();
Blob Wx = blobFromNPY(_tf("lstm.prototxt.w_0.npy"));
Blob Wh = blobFromNPY(_tf("lstm.prototxt.w_2.npy"));
Blob b = blobFromNPY(_tf("lstm.prototxt.w_1.npy"));
layer->setWeights(Wh, Wx, b);
Blob inp = blobFromNPY(_tf("recurrent.input.npy"));
std::vector<Blob> inputs(1, inp), outputs;
runLayer(layer, inputs, outputs);
Blob h_t_reference = blobFromNPY(_tf("lstm.prototxt.h_1.npy"));
normAssert(h_t_reference, outputs[0]);
}
TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent)
{
Ptr<RNNLayer> layer = RNNLayer::create();
layer->setWeights(
blobFromNPY(_tf("rnn.prototxt.w_0.npy")),
blobFromNPY(_tf("rnn.prototxt.w_1.npy")),
blobFromNPY(_tf("rnn.prototxt.w_2.npy")),
blobFromNPY(_tf("rnn.prototxt.w_3.npy")),
blobFromNPY(_tf("rnn.prototxt.w_4.npy")) );
std::vector<Blob> output, input(1, blobFromNPY(_tf("recurrent.input.npy")));
runLayer(layer, input, output);
Blob h_ref = blobFromNPY(_tf("rnn.prototxt.h_1.npy"));
normAssert(h_ref, output[0]);
}
class Layer_RNN_Test : public ::testing::Test
{
public:
int nX, nH, nO, nT, nS;
Blob Whh, Wxh, bh, Who, bo;
Ptr<RNNLayer> layer;
std::vector<Blob> inputs, outputs;
Layer_RNN_Test()
{
nT = 3;
nS = 5;
nX = 31;
nH = 64;
nO = 100;
Whh = Blob(BlobShape(nH, nH));
Wxh = Blob(BlobShape(nH, nX));
bh = Blob(BlobShape(nH, 1));
Who = Blob(BlobShape(nO, nH));
bo = Blob(BlobShape(nO, 1));
layer = RNNLayer::create();
layer->setProduceHiddenOutput(true);
layer->setWeights(Wxh, bh, Whh, Who, bo);
}
};
TEST_F(Layer_RNN_Test, get_set_test)
{
inputs.push_back(Blob(BlobShape(nT, nS, 1, nX)));
runLayer(layer, inputs, outputs);
EXPECT_EQ(outputs.size(), 2);
EXPECT_EQ(outputs[0].shape(), BlobShape(nT, nS, nO));
EXPECT_EQ(outputs[1].shape(), BlobShape(nT, nS, nH));
}
}

@ -1,3 +1,31 @@
#include "test_precomp.hpp"
CV_TEST_MAIN("")
namespace cvtest
{
using namespace cv;
using namespace cv::dnn;
TEST(BlobShape_SimpleConstr, Regression)
{
BlobShape sd;
BlobShape s1(0);
EXPECT_EQ(s1.dims(), 1);
EXPECT_EQ(s1[0], 0);
BlobShape s2(0, 0);
EXPECT_EQ(s2.dims(), 2);
EXPECT_EQ(s2[0], 0);
EXPECT_EQ(s2[1], 0);
}
TEST(BlobShape_EmptyFill, Regression)
{
BlobShape s(10, (int*)NULL);
EXPECT_EQ(s.dims(), 10);
}
}

@ -0,0 +1 @@
*.caffemodel
Loading…
Cancel
Save