Custom layers for deep learning networks (#11129)

* Custom deep learning layers support

* Stack custom deep learning layers
pull/11385/head
Dmitry Kurtaev 7 years ago committed by Vadim Pisarevsky
parent 909a25571e
commit 4ec456f0a0
  1. 36
      3rdparty/protobuf/src/google/protobuf/text_format.cc
  2. 2
      3rdparty/protobuf/src/google/protobuf/text_format.h
  3. 192
      doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md
  4. 8
      doc/tutorials/dnn/table_of_content_dnn.markdown
  5. 2
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  6. 4
      modules/dnn/include/opencv2/dnn/dict.hpp
  7. 22
      modules/dnn/include/opencv2/dnn/dnn.inl.hpp
  8. 16
      modules/dnn/include/opencv2/dnn/layer.details.hpp
  9. 4
      modules/dnn/include/opencv2/dnn/layer.hpp
  10. 22
      modules/dnn/src/caffe/caffe_importer.cpp
  11. 2
      modules/dnn/src/caffe/caffe_io.cpp
  12. 28
      modules/dnn/src/dnn.cpp
  13. 40
      modules/dnn/src/tensorflow/tf_importer.cpp
  14. 16
      modules/dnn/src/torch/torch_importer.cpp
  15. 234
      modules/dnn/test/test_layers.cpp
  16. 62
      modules/dnn/test/test_misc.cpp
  17. 93
      modules/dnn/test/test_tf_importer.cpp
  18. 59
      modules/dnn/test/test_torch_importer.cpp
  19. 232
      samples/cpp/tutorial_code/dnn/custom_layers.cpp

@ -469,8 +469,9 @@ class TextFormat::Parser::ParserImpl {
"\" has no field named \"" + field_name + "\"."); "\" has no field named \"" + field_name + "\".");
return false; return false;
} else { } else {
ReportWarning("Message type \"" + descriptor->full_name() + // No warnings to let user define custom layers (see https://github.com/opencv/opencv/pull/11129)
"\" has no field named \"" + field_name + "\"."); // ReportWarning("Message type \"" + descriptor->full_name() +
// "\" has no field named \"" + field_name + "\".");
} }
} }
} }
@ -485,10 +486,13 @@ class TextFormat::Parser::ParserImpl {
// start with "{" or "<" which indicates the beginning of a message body. // start with "{" or "<" which indicates the beginning of a message body.
// If there is no ":" or there is a "{" or "<" after ":", this field has // If there is no ":" or there is a "{" or "<" after ":", this field has
// to be a message or the input is ill-formed. // to be a message or the input is ill-formed.
UnknownFieldSet* unknown_fields = reflection->MutableUnknownFields(message);
if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
return SkipFieldValue(); UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count());
unknown_field->AddLengthDelimited(0, field_name); // Add a field's name.
return SkipFieldValue(unknown_field);
} else { } else {
return SkipFieldMessage(); return SkipFieldMessage(unknown_fields);
} }
} }
@ -571,7 +575,7 @@ label_skip_parsing:
} }
// Skips the next field including the field's name and value. // Skips the next field including the field's name and value.
bool SkipField() { bool SkipField(UnknownFieldSet* unknown_fields) {
string field_name; string field_name;
if (TryConsume("[")) { if (TryConsume("[")) {
// Extension name. // Extension name.
@ -588,9 +592,11 @@ label_skip_parsing:
// If there is no ":" or there is a "{" or "<" after ":", this field has // If there is no ":" or there is a "{" or "<" after ":", this field has
// to be a message or the input is ill-formed. // to be a message or the input is ill-formed.
if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
DO(SkipFieldValue()); UnknownFieldSet* unknown_field = unknown_fields->AddGroup(unknown_fields->field_count());
unknown_field->AddLengthDelimited(0, field_name); // Add a field's name.
DO(SkipFieldValue(unknown_field));
} else { } else {
DO(SkipFieldMessage()); DO(SkipFieldMessage(unknown_fields));
} }
// For historical reasons, fields may optionally be separated by commas or // For historical reasons, fields may optionally be separated by commas or
// semicolons. // semicolons.
@ -625,11 +631,11 @@ label_skip_parsing:
// Skips the whole body of a message including the beginning delimiter and // Skips the whole body of a message including the beginning delimiter and
// the ending delimiter. // the ending delimiter.
bool SkipFieldMessage() { bool SkipFieldMessage(UnknownFieldSet* unknown_fields) {
string delimiter; string delimiter;
DO(ConsumeMessageDelimiter(&delimiter)); DO(ConsumeMessageDelimiter(&delimiter));
while (!LookingAt(">") && !LookingAt("}")) { while (!LookingAt(">") && !LookingAt("}")) {
DO(SkipField()); DO(SkipField(unknown_fields));
} }
DO(Consume(delimiter)); DO(Consume(delimiter));
return true; return true;
@ -769,7 +775,7 @@ label_skip_parsing:
return true; return true;
} }
bool SkipFieldValue() { bool SkipFieldValue(UnknownFieldSet* unknown_field) {
if (LookingAtType(io::Tokenizer::TYPE_STRING)) { if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
while (LookingAtType(io::Tokenizer::TYPE_STRING)) { while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
tokenizer_.Next(); tokenizer_.Next();
@ -779,9 +785,9 @@ label_skip_parsing:
if (TryConsume("[")) { if (TryConsume("[")) {
while (true) { while (true) {
if (!LookingAt("{") && !LookingAt("<")) { if (!LookingAt("{") && !LookingAt("<")) {
DO(SkipFieldValue()); DO(SkipFieldValue(unknown_field));
} else { } else {
DO(SkipFieldMessage()); DO(SkipFieldMessage(unknown_field));
} }
if (TryConsume("]")) { if (TryConsume("]")) {
break; break;
@ -833,6 +839,8 @@ label_skip_parsing:
return false; return false;
} }
} }
// Use a tag 1 because tag 0 is used for field's name.
unknown_field->AddLengthDelimited(1, tokenizer_.current().text);
tokenizer_.Next(); tokenizer_.Next();
return true; return true;
} }
@ -1298,13 +1306,13 @@ class TextFormat::Printer::TextGenerator
TextFormat::Finder::~Finder() { TextFormat::Finder::~Finder() {
} }
TextFormat::Parser::Parser() TextFormat::Parser::Parser(bool allow_unknown_field)
: error_collector_(NULL), : error_collector_(NULL),
finder_(NULL), finder_(NULL),
parse_info_tree_(NULL), parse_info_tree_(NULL),
allow_partial_(false), allow_partial_(false),
allow_case_insensitive_field_(false), allow_case_insensitive_field_(false),
allow_unknown_field_(false), allow_unknown_field_(allow_unknown_field),
allow_unknown_enum_(false), allow_unknown_enum_(false),
allow_field_number_(false), allow_field_number_(false),
allow_relaxed_whitespace_(false), allow_relaxed_whitespace_(false),

@ -457,7 +457,7 @@ class LIBPROTOBUF_EXPORT TextFormat {
// For more control over parsing, use this class. // For more control over parsing, use this class.
class LIBPROTOBUF_EXPORT Parser { class LIBPROTOBUF_EXPORT Parser {
public: public:
Parser(); Parser(bool allow_unknown_field = false);
~Parser(); ~Parser();
// Like TextFormat::Parse(). // Like TextFormat::Parse().

@ -0,0 +1,192 @@
# Custom deep learning layers support {#tutorial_dnn_custom_layers}
## Introduction
Deep learning is a fast growing area. The new approaches to build neural networks
usually introduce new types of layers. They could be modifications of existing
ones or implement outstanding researching ideas.
OpenCV gives an opportunity to import and run networks from different deep learning
frameworks. There are a number of the most popular layers. However you can face
a problem that your network cannot be imported using OpenCV because of unimplemented layers.
The first solution is to create a feature request at https://github.com/opencv/opencv/issues
mentioning details such a source of model and type of new layer. A new layer could
be implemented if OpenCV community shares this need.
The second way is to define a **custom layer** so OpenCV's deep learning engine
will know how to use it. This tutorial is dedicated to show you a process of deep
learning models import customization.
## Define a custom layer in C++
Deep learning layer is a building block of network's pipeline.
It has connections to **input blobs** and produces results to **output blobs**.
There are trained **weights** and **hyper-parameters**.
Layers' names, types, weights and hyper-parameters are stored in files are generated by
native frameworks during training. If OpenCV mets unknown layer type it throws an
exception trying to read a model:
```
Unspecified error: Can't create layer "layer_name" of type "MyType" in function getLayerInstance
```
To import the model correctly you have to derive a class from cv::dnn::Layer with
the following methods:
@snippet dnn/custom_layers.cpp A custom layer interface
And register it before the import:
@snippet dnn/custom_layers.cpp Register a custom layer
@note `MyType` is a type of unimplemented layer from the thrown exception.
Let's see what all the methods do:
- Constructor
@snippet dnn/custom_layers.cpp MyLayer::MyLayer
Retrieves hyper-parameters from cv::dnn::LayerParams. If your layer has trainable
weights they will be already stored in the Layer's member cv::dnn::Layer::blobs.
- A static method `create`
@snippet dnn/custom_layers.cpp MyLayer::create
This method should create an instance of you layer and return cv::Ptr with it.
- Output blobs' shape computation
@snippet dnn/custom_layers.cpp MyLayer::getMemoryShapes
Returns layer's output shapes depends on input shapes. You may request an extra
memory using `internals`.
- Run a layer
@snippet dnn/custom_layers.cpp MyLayer::forward
Implement a layer's logic here. Compute outputs for given inputs.
@note OpenCV manages memory allocated for layers. In the most cases the same memory
can be reused between layers. So your `forward` implementation should not rely that
the second invocation of `forward` will has the same data at `outputs` and `internals`.
- Optional `finalize` method
@snippet dnn/custom_layers.cpp MyLayer::finalize
The chain of methods are the following: OpenCV deep learning engine calls `create`
method once then it calls `getMemoryShapes` for an every created layer then you
can make some preparations depends on known input dimensions at cv::dnn::Layer::finalize.
After network was initialized only `forward` method is called for an every network's input.
@note Varying input blobs' sizes such height or width or batch size you make OpenCV
reallocate all the internal memory. That leads efficiency gaps. Try to initialize
and deploy models using a fixed batch size and image's dimensions.
## Example: custom layer from Caffe
Let's create a custom layer `Interp` from https://github.com/cdmh/deeplab-public.
It's just a simple resize that takes an input blob of size `N x C x Hi x Wi` and returns
an output blob of size `N x C x Ho x Wo` where `N` is a batch size, `C` is a number of channels,
`Hi x Wi` and `Ho x Wo` are input and output `height x width` correspondingly.
This layer has no trainable weights but it has hyper-parameters to specify an output size.
In example,
~~~~~~~~~~~~~
layer {
name: "output"
type: "Interp"
bottom: "input"
top: "output"
interp_param {
height: 9
width: 8
}
}
~~~~~~~~~~~~~
This way our implementation can look like:
@snippet dnn/custom_layers.cpp InterpLayer
Next we need to register a new layer type and try to import the model.
@snippet dnn/custom_layers.cpp Register InterpLayer
## Example: custom layer from TensorFlow
This is an example of how to import a network with [tf.image.resize_bilinear](https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_bilinear)
operation. This is also a resize but with an implementation different from OpenCV's or `Interp` above.
Let's create a single layer network:
~~~~~~~~~~~~~{.py}
inp = tf.placeholder(tf.float32, [2, 3, 4, 5], 'input')
resized = tf.image.resize_bilinear(inp, size=[9, 8], name='resize_bilinear')
~~~~~~~~~~~~~
OpenCV sees that TensorFlow's graph in the following way:
```
node {
name: "input"
op: "Placeholder"
attr {
key: "dtype"
value {
type: DT_FLOAT
}
}
}
node {
name: "resize_bilinear/size"
op: "Const"
attr {
key: "dtype"
value {
type: DT_INT32
}
}
attr {
key: "value"
value {
tensor {
dtype: DT_INT32
tensor_shape {
dim {
size: 2
}
}
tensor_content: "\t\000\000\000\010\000\000\000"
}
}
}
}
node {
name: "resize_bilinear"
op: "ResizeBilinear"
input: "input:0"
input: "resize_bilinear/size"
attr {
key: "T"
value {
type: DT_FLOAT
}
}
attr {
key: "align_corners"
value {
b: false
}
}
}
library {
}
```
Custom layers import from TensorFlow is designed to put all layer's `attr` into
cv::dnn::LayerParams but input `Const` blobs into cv::dnn::Layer::blobs.
In our case resize's output shape will be stored in layer's `blobs[0]`.
@snippet dnn/custom_layers.cpp ResizeBilinearLayer
Next we register a layer and try to import the model.
@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer

@ -48,3 +48,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
*Author:* Dmitry Kurtaev *Author:* Dmitry Kurtaev
In this tutorial we'll run deep learning models in browser using OpenCV.js. In this tutorial we'll run deep learning models in browser using OpenCV.js.
- @subpage tutorial_dnn_custom_layers
*Compatibility:* \> OpenCV 3.4.1
*Author:* Dmitry Kurtaev
How to define custom layers to import networks.

@ -555,7 +555,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
* An every sample in the batch is normalized separately. Optionally, * An every sample in the batch is normalized separately. Optionally,
* output is scaled by the trained parameters. * output is scaled by the trained parameters.
*/ */
class NormalizeBBoxLayer : public Layer class CV_EXPORTS NormalizeBBoxLayer : public Layer
{ {
public: public:
float pnorm, epsilon; float pnorm, epsilon;

@ -142,6 +142,10 @@ public:
const T &set(const String &key, const T &value); const T &set(const String &key, const T &value);
friend std::ostream &operator<<(std::ostream &stream, const Dict &dict); friend std::ostream &operator<<(std::ostream &stream, const Dict &dict);
std::map<String, DictValue>::const_iterator begin() const;
std::map<String, DictValue>::const_iterator end() const;
}; };
//! @} //! @}

@ -102,9 +102,13 @@ inline int64 DictValue::get<int64>(int idx) const
return (int64)doubleValue; return (int64)doubleValue;
} }
else if (type == Param::STRING)
{
return std::atoi((*ps)[idx].c_str());
}
else else
{ {
CV_Assert(isInt() || isReal()); CV_Assert(isInt() || isReal() || isString());
return 0; return 0;
} }
} }
@ -146,9 +150,13 @@ inline double DictValue::get<double>(int idx) const
{ {
return (double)(*pi)[idx]; return (double)(*pi)[idx];
} }
else if (type == Param::STRING)
{
return std::atof((*ps)[idx].c_str());
}
else else
{ {
CV_Assert(isReal() || isInt()); CV_Assert(isReal() || isInt() || isString());
return 0; return 0;
} }
} }
@ -366,6 +374,16 @@ inline std::ostream &operator<<(std::ostream &stream, const Dict &dict)
return stream; return stream;
} }
inline std::map<String, DictValue>::const_iterator Dict::begin() const
{
return dict.begin();
}
inline std::map<String, DictValue>::const_iterator Dict::end() const
{
return dict.end();
}
CV__DNN_EXPERIMENTAL_NS_END CV__DNN_EXPERIMENTAL_NS_END
} }
} }

@ -13,11 +13,11 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
/** @brief Registers layer constructor in runtime. /** @brief Registers layer constructor in runtime.
* @param type string, containing type name of the layer. * @param type string, containing type name of the layer.
* @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer. * @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
* @details This macros must be placed inside the function code. * @details This macros must be placed inside the function code.
*/ */
#define CV_DNN_REGISTER_LAYER_FUNC(type, constuctorFunc) \ #define CV_DNN_REGISTER_LAYER_FUNC(type, constructorFunc) \
cv::dnn::LayerFactory::registerLayer(#type, constuctorFunc); cv::dnn::LayerFactory::registerLayer(#type, constructorFunc);
/** @brief Registers layer class in runtime. /** @brief Registers layer class in runtime.
* @param type string, containing type name of the layer. * @param type string, containing type name of the layer.
@ -29,11 +29,11 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
/** @brief Registers layer constructor on module load time. /** @brief Registers layer constructor on module load time.
* @param type string, containing type name of the layer. * @param type string, containing type name of the layer.
* @param constuctorFunc pointer to the function of type LayerRegister::Constuctor, which creates the layer. * @param constructorFunc pointer to the function of type LayerRegister::Constructor, which creates the layer.
* @details This macros must be placed outside the function code. * @details This macros must be placed outside the function code.
*/ */
#define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constuctorFunc) \ #define CV_DNN_REGISTER_LAYER_FUNC_STATIC(type, constructorFunc) \
static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constuctorFunc); static cv::dnn::details::_LayerStaticRegisterer __LayerStaticRegisterer_##type(#type, constructorFunc);
/** @brief Registers layer class on module load time. /** @brief Registers layer class on module load time.
* @param type string, containing type name of the layer. * @param type string, containing type name of the layer.
@ -59,10 +59,10 @@ class _LayerStaticRegisterer
String type; String type;
public: public:
_LayerStaticRegisterer(const String &layerType, LayerFactory::Constuctor layerConstuctor) _LayerStaticRegisterer(const String &layerType, LayerFactory::Constructor layerConstructor)
{ {
this->type = layerType; this->type = layerType;
LayerFactory::registerLayer(layerType, layerConstuctor); LayerFactory::registerLayer(layerType, layerConstructor);
} }
~_LayerStaticRegisterer() ~_LayerStaticRegisterer()

@ -58,10 +58,10 @@ class CV_EXPORTS LayerFactory
public: public:
//! Each Layer class must provide this function to the factory //! Each Layer class must provide this function to the factory
typedef Ptr<Layer>(*Constuctor)(LayerParams &params); typedef Ptr<Layer>(*Constructor)(LayerParams &params);
//! Registers the layer class with typename @p type and specified @p constructor. Thread-safe. //! Registers the layer class with typename @p type and specified @p constructor. Thread-safe.
static void registerLayer(const String &type, Constuctor constructor); static void registerLayer(const String &type, Constructor constructor);
//! Unregisters registered layer with specified type name. Thread-safe. //! Unregisters registered layer with specified type name. Thread-safe.
static void unregisterLayer(const String &type); static void unregisterLayer(const String &type);

@ -103,6 +103,19 @@ public:
ReadNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBinary); ReadNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBinary);
} }
void extractCustomParams(const google::protobuf::UnknownFieldSet& unknownFields, cv::dnn::LayerParams &params)
{
const int numFields = unknownFields.field_count();
for (int i = 0; i < numFields; ++i)
{
const google::protobuf::UnknownField& field = unknownFields.field(i);
CV_Assert(field.type() == google::protobuf::UnknownField::TYPE_GROUP);
std::string fieldName = field.group().field(0).length_delimited();
std::string fieldValue = field.group().field(1).length_delimited();
params.set(fieldName, fieldValue);
}
}
void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams &params) void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams &params)
{ {
const Reflection *refl = msg.GetReflection(); const Reflection *refl = msg.GetReflection();
@ -187,12 +200,15 @@ public:
if (!isInternal && !ends_with_param(fd->name())) if (!isInternal && !ends_with_param(fd->name()))
continue; continue;
const google::protobuf::UnknownFieldSet& unknownFields = msgRefl->GetUnknownFields(msg);
bool hasData = fd->is_required() || bool hasData = fd->is_required() ||
(fd->is_optional() && msgRefl->HasField(msg, fd)) || (fd->is_optional() && msgRefl->HasField(msg, fd)) ||
(fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0); (fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0) ||
!unknownFields.empty();
if (!hasData) if (!hasData)
continue; continue;
extractCustomParams(unknownFields, params);
if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE)
{ {
if (fd->is_repeated()) //Extract only first item! if (fd->is_repeated()) //Extract only first item!
@ -258,7 +274,7 @@ public:
} }
} }
void extractBinaryLayerParms(const caffe::LayerParameter& layer, LayerParams& layerParams) void extractBinaryLayerParams(const caffe::LayerParameter& layer, LayerParams& layerParams)
{ {
const std::string &name = layer.name(); const std::string &name = layer.name();
@ -319,7 +335,7 @@ public:
LayerParams layerParams; LayerParams layerParams;
extractLayerParams(layer, layerParams); extractLayerParams(layer, layerParams);
extractBinaryLayerParms(layer, layerParams); extractBinaryLayerParams(layer, layerParams);
int repetitions = layerCounter[name]++; int repetitions = layerCounter[name]++;
if (repetitions) if (repetitions)

@ -1120,7 +1120,7 @@ bool ReadProtoFromTextFile(const char* filename, Message* proto) {
std::ifstream fs(filename, std::ifstream::in); std::ifstream fs(filename, std::ifstream::in);
CHECK(fs.is_open()) << "Can't open \"" << filename << "\""; CHECK(fs.is_open()) << "Can't open \"" << filename << "\"";
IstreamInputStream input(&fs); IstreamInputStream input(&fs);
return google::protobuf::TextFormat::Parse(&input, proto); return google::protobuf::TextFormat::Parser(true).Parse(&input, proto);
} }
bool ReadProtoFromBinaryFile(const char* filename, Message* proto) { bool ReadProtoFromBinaryFile(const char* filename, Message* proto) {

@ -2790,7 +2790,7 @@ static Mutex& getLayerFactoryMutex()
return *instance; return *instance;
} }
typedef std::map<String, LayerFactory::Constuctor> LayerFactory_Impl; typedef std::map<String, std::vector<LayerFactory::Constructor> > LayerFactory_Impl;
static LayerFactory_Impl& getLayerFactoryImpl_() static LayerFactory_Impl& getLayerFactoryImpl_()
{ {
@ -2813,21 +2813,22 @@ static LayerFactory_Impl& getLayerFactoryImpl()
return *instance; return *instance;
} }
void LayerFactory::registerLayer(const String &type, Constuctor constructor) void LayerFactory::registerLayer(const String &type, Constructor constructor)
{ {
CV_TRACE_FUNCTION(); CV_TRACE_FUNCTION();
CV_TRACE_ARG_VALUE(type, "type", type.c_str()); CV_TRACE_ARG_VALUE(type, "type", type.c_str());
cv::AutoLock lock(getLayerFactoryMutex()); cv::AutoLock lock(getLayerFactoryMutex());
String type_ = type.toLowerCase(); String type_ = type.toLowerCase();
LayerFactory_Impl::const_iterator it = getLayerFactoryImpl().find(type_); LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
if (it != getLayerFactoryImpl().end() && it->second != constructor) if (it != getLayerFactoryImpl().end())
{ {
CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered"); if (it->second.back() == constructor)
CV_Error(cv::Error::StsBadArg, "Layer \"" + type_ + "\" already was registered");
it->second.push_back(constructor);
} }
getLayerFactoryImpl().insert(std::make_pair(type_, std::vector<Constructor>(1, constructor)));
getLayerFactoryImpl().insert(std::make_pair(type_, constructor));
} }
void LayerFactory::unregisterLayer(const String &type) void LayerFactory::unregisterLayer(const String &type)
@ -2837,7 +2838,15 @@ void LayerFactory::unregisterLayer(const String &type)
cv::AutoLock lock(getLayerFactoryMutex()); cv::AutoLock lock(getLayerFactoryMutex());
String type_ = type.toLowerCase(); String type_ = type.toLowerCase();
getLayerFactoryImpl().erase(type_);
LayerFactory_Impl::iterator it = getLayerFactoryImpl().find(type_);
if (it != getLayerFactoryImpl().end())
{
if (it->second.size() > 1)
it->second.pop_back();
else
getLayerFactoryImpl().erase(it);
}
} }
Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params) Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& params)
@ -2851,7 +2860,8 @@ Ptr<Layer> LayerFactory::createLayerInstance(const String &type, LayerParams& pa
if (it != getLayerFactoryImpl().end()) if (it != getLayerFactoryImpl().end())
{ {
return it->second(params); CV_Assert(!it->second.empty());
return it->second.back()(params);
} }
else else
{ {

@ -1564,8 +1564,44 @@ void TFImporter::populateNet(Net dstNet)
} }
else else
{ {
printLayerAttr(layer); // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer.
CV_Error_(Error::StsError, ("Unknown layer type %s in op %s", type.c_str(), name.c_str())); // However we create a layer with the same type and rely that user defined a custom layer.
// All the attributes are added to LayerParams.
google::protobuf::Map<std::string, tensorflow::AttrValue> attr = layer.attr();
for (google::protobuf::Map<std::string, tensorflow::AttrValue>::const_iterator ai = attr.begin();
ai != attr.end(); ++ai)
{
if (ai->second.value_case() == tensorflow::AttrValue::kS) // string
layerParams.set(ai->first, ai->second.s());
if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64
layerParams.set(ai->first, ai->second.i());
if (ai->second.value_case() == tensorflow::AttrValue::kF) // float
layerParams.set(ai->first, ai->second.f());
if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool
layerParams.set(ai->first, ai->second.b());
}
// All the Const input nodes are added to layer's blobs.
std::vector<std::string> inputsNames;
for (int i = 0; i < layer.input_size(); ++i)
{
// Check if input is a Const node.
if (value_id.find(layer.input(i)) != value_id.end())
{
Mat blob = getTensorContent(getConstBlob(layer, value_id, i));
layerParams.blobs.push_back(blob);
}
else
inputsNames.push_back(layer.input(i));
}
int id = dstNet.addLayer(name, type, layerParams);
layer_id[name] = id;
for (int i = 0; i < inputsNames.size(); ++i)
{
connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i);
}
} }
} }
} }

@ -940,7 +940,21 @@ struct TorchImporter
} }
else else
{ {
CV_Error(Error::StsNotImplemented, "Unknown nn class \"" + className + "\""); // Importer does not know how to map Torch's layer type to an OpenCV's one.
// However we parse all the parameters to let user create a custom layer.
readTorchTable(scalarParams, tensorParams);
for (std::map<String, DictValue>::const_iterator it = scalarParams.begin();
it != scalarParams.end(); ++it)
{
layerParams.set(it->first, it->second);
}
for (std::map<String, std::pair<int, Mat> >::iterator it = tensorParams.begin();
it != tensorParams.end(); ++it)
{
layerParams.blobs.push_back(it->second.second);
}
newModule->apiType = nnName;
curModule->modules.push_back(newModule);
} }
} }
else else

@ -44,7 +44,7 @@
#include "npy_blob.hpp" #include "npy_blob.hpp"
#include <opencv2/dnn/shape_utils.hpp> #include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp> #include <opencv2/dnn/all_layers.hpp>
#include <opencv2/ts/ocl_test.hpp> #include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
namespace opencv_test { namespace { namespace opencv_test { namespace {
@ -117,94 +117,50 @@ void testLayerUsingCaffeModels(String basename, int targetId = DNN_TARGET_CPU,
normAssert(ref, out); normAssert(ref, out);
} }
TEST(Layer_Test_Softmax, Accuracy) typedef testing::TestWithParam<DNNTarget> Test_Caffe_layers;
{ TEST_P(Test_Caffe_layers, Softmax)
testLayerUsingCaffeModels("layer_softmax");
}
OCL_TEST(Layer_Test_Softmax, Accuracy)
{
testLayerUsingCaffeModels("layer_softmax", DNN_TARGET_OPENCL);
}
TEST(Layer_Test_LRN_spatial, Accuracy)
{
testLayerUsingCaffeModels("layer_lrn_spatial");
}
OCL_TEST(Layer_Test_LRN_spatial, Accuracy)
{
testLayerUsingCaffeModels("layer_lrn_spatial", DNN_TARGET_OPENCL);
}
TEST(Layer_Test_LRN_channels, Accuracy)
{
testLayerUsingCaffeModels("layer_lrn_channels");
}
OCL_TEST(Layer_Test_LRN_channels, Accuracy)
{
testLayerUsingCaffeModels("layer_lrn_channels", DNN_TARGET_OPENCL);
}
TEST(Layer_Test_Convolution, Accuracy)
{
testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_CPU, true);
}
OCL_TEST(Layer_Test_Convolution, Accuracy)
{
testLayerUsingCaffeModels("layer_convolution", DNN_TARGET_OPENCL, true);
}
TEST(Layer_Test_DeConvolution, Accuracy)
{
testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_CPU, true, false);
}
OCL_TEST(Layer_Test_DeConvolution, Accuracy)
{ {
testLayerUsingCaffeModels("layer_deconvolution", DNN_TARGET_OPENCL, true, false); testLayerUsingCaffeModels("layer_softmax", GetParam());
} }
TEST(Layer_Test_InnerProduct, Accuracy) TEST_P(Test_Caffe_layers, LRN_spatial)
{ {
testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_CPU, true); testLayerUsingCaffeModels("layer_lrn_spatial", GetParam());
} }
OCL_TEST(Layer_Test_InnerProduct, Accuracy) TEST_P(Test_Caffe_layers, LRN_channels)
{ {
testLayerUsingCaffeModels("layer_inner_product", DNN_TARGET_OPENCL, true); testLayerUsingCaffeModels("layer_lrn_channels", GetParam());
} }
TEST(Layer_Test_Pooling_max, Accuracy) TEST_P(Test_Caffe_layers, Convolution)
{ {
testLayerUsingCaffeModels("layer_pooling_max"); testLayerUsingCaffeModels("layer_convolution", GetParam(), true);
} }
OCL_TEST(Layer_Test_Pooling_max, Accuracy) TEST_P(Test_Caffe_layers, DeConvolution)
{ {
testLayerUsingCaffeModels("layer_pooling_max", DNN_TARGET_OPENCL); testLayerUsingCaffeModels("layer_deconvolution", GetParam(), true, false);
} }
TEST(Layer_Test_Pooling_ave, Accuracy) TEST_P(Test_Caffe_layers, InnerProduct)
{ {
testLayerUsingCaffeModels("layer_pooling_ave"); testLayerUsingCaffeModels("layer_inner_product", GetParam(), true);
} }
OCL_TEST(Layer_Test_Pooling_ave, Accuracy) TEST_P(Test_Caffe_layers, Pooling_max)
{ {
testLayerUsingCaffeModels("layer_pooling_ave", DNN_TARGET_OPENCL); testLayerUsingCaffeModels("layer_pooling_max", GetParam());
} }
TEST(Layer_Test_MVN, Accuracy) TEST_P(Test_Caffe_layers, Pooling_ave)
{ {
testLayerUsingCaffeModels("layer_mvn"); testLayerUsingCaffeModels("layer_pooling_ave", GetParam());
} }
OCL_TEST(Layer_Test_MVN, Accuracy) TEST_P(Test_Caffe_layers, MVN)
{ {
testLayerUsingCaffeModels("layer_mvn", DNN_TARGET_OPENCL); testLayerUsingCaffeModels("layer_mvn", GetParam());
} }
void testReshape(const MatShape& inputShape, const MatShape& targetShape, void testReshape(const MatShape& inputShape, const MatShape& targetShape,
@ -257,14 +213,9 @@ TEST(Layer_Test_BatchNorm, local_stats)
testLayerUsingCaffeModels("layer_batch_norm_local_stats", DNN_TARGET_CPU, true, false); testLayerUsingCaffeModels("layer_batch_norm_local_stats", DNN_TARGET_CPU, true, false);
} }
TEST(Layer_Test_ReLU, Accuracy) TEST_P(Test_Caffe_layers, ReLU)
{
testLayerUsingCaffeModels("layer_relu");
}
OCL_TEST(Layer_Test_ReLU, Accuracy)
{ {
testLayerUsingCaffeModels("layer_relu", DNN_TARGET_OPENCL); testLayerUsingCaffeModels("layer_relu", GetParam());
} }
TEST(Layer_Test_Dropout, Accuracy) TEST(Layer_Test_Dropout, Accuracy)
@ -272,14 +223,9 @@ TEST(Layer_Test_Dropout, Accuracy)
testLayerUsingCaffeModels("layer_dropout"); testLayerUsingCaffeModels("layer_dropout");
} }
TEST(Layer_Test_Concat, Accuracy) TEST_P(Test_Caffe_layers, Concat)
{
testLayerUsingCaffeModels("layer_concat");
}
OCL_TEST(Layer_Test_Concat, Accuracy)
{ {
testLayerUsingCaffeModels("layer_concat", DNN_TARGET_OPENCL); testLayerUsingCaffeModels("layer_concat", GetParam());
} }
TEST(Layer_Test_Fused_Concat, Accuracy) TEST(Layer_Test_Fused_Concat, Accuracy)
@ -325,26 +271,16 @@ TEST(Layer_Test_Fused_Concat, Accuracy)
testLayerUsingCaffeModels("layer_concat_shared_input", DNN_TARGET_CPU, true, false); testLayerUsingCaffeModels("layer_concat_shared_input", DNN_TARGET_CPU, true, false);
} }
TEST(Layer_Test_Eltwise, Accuracy) TEST_P(Test_Caffe_layers, Eltwise)
{ {
testLayerUsingCaffeModels("layer_eltwise"); testLayerUsingCaffeModels("layer_eltwise", GetParam());
} }
OCL_TEST(Layer_Test_Eltwise, Accuracy) TEST_P(Test_Caffe_layers, PReLU)
{ {
testLayerUsingCaffeModels("layer_eltwise", DNN_TARGET_OPENCL); int targetId = GetParam();
} testLayerUsingCaffeModels("layer_prelu", targetId, true);
testLayerUsingCaffeModels("layer_prelu_fc", targetId, true, false);
TEST(Layer_Test_PReLU, Accuracy)
{
testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_CPU, true);
testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_CPU, true, false);
}
OCL_TEST(Layer_Test_PReLU, Accuracy)
{
testLayerUsingCaffeModels("layer_prelu", DNN_TARGET_OPENCL, true);
testLayerUsingCaffeModels("layer_prelu_fc", DNN_TARGET_OPENCL, true, false);
} }
//template<typename XMat> //template<typename XMat>
@ -385,14 +321,9 @@ static void test_Reshape_Split_Slice_layers(int targetId)
normAssert(input, output); normAssert(input, output);
} }
TEST(Layer_Test_Reshape_Split_Slice, Accuracy) TEST_P(Test_Caffe_layers, Reshape_Split_Slice)
{
test_Reshape_Split_Slice_layers(DNN_TARGET_CPU);
}
OCL_TEST(Layer_Test_Reshape_Split_Slice, Accuracy)
{ {
test_Reshape_Split_Slice_layers(DNN_TARGET_OPENCL); test_Reshape_Split_Slice_layers(GetParam());
} }
TEST(Layer_Conv_Elu, Accuracy) TEST(Layer_Conv_Elu, Accuracy)
@ -602,7 +533,6 @@ TEST(Layer_Test_ROIPooling, Accuracy)
normAssert(out, ref); normAssert(out, ref);
} }
typedef testing::TestWithParam<DNNTarget> Test_Caffe_layers;
TEST_P(Test_Caffe_layers, FasterRCNN_Proposal) TEST_P(Test_Caffe_layers, FasterRCNN_Proposal)
{ {
Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt")); Net net = readNetFromCaffe(_tf("net_faster_rcnn_proposal.prototxt"));
@ -906,4 +836,104 @@ TEST(Test_DLDT, two_inputs)
} }
#endif // HAVE_INF_ENGINE #endif // HAVE_INF_ENGINE
// Test a custom layer.
class InterpLayer CV_FINAL : public Layer
{
public:
InterpLayer(const LayerParams &params) : Layer(params)
{
zoomFactor = params.get<int>("zoom_factor", 0);
outWidth = params.get<int>("width", 0);
outHeight = params.get<int>("height", 0);
}
static Ptr<InterpLayer> create(LayerParams& params)
{
return Ptr<InterpLayer>(new InterpLayer(params));
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const CV_OVERRIDE
{
const int batchSize = inputs[0][0];
const int numChannels = inputs[0][1];
const int inpHeight = inputs[0][2];
const int inpWidth = inputs[0][3];
std::vector<int> outShape(4);
outShape[0] = batchSize;
outShape[1] = numChannels;
outShape[2] = outHeight != 0 ? outHeight : (inpHeight + (inpHeight - 1) * (zoomFactor - 1));
outShape[3] = outWidth != 0 ? outWidth : (inpWidth + (inpWidth - 1) * (zoomFactor - 1));
outputs.assign(1, outShape);
return false;
}
virtual void finalize(const std::vector<Mat*>& inputs, std::vector<Mat> &outputs) CV_OVERRIDE
{
if (!outWidth && !outHeight)
{
outHeight = outputs[0].size[2];
outWidth = outputs[0].size[3];
}
}
// Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat>& internals) CV_OVERRIDE
{
Mat& inp = *inputs[0];
Mat& out = outputs[0];
const float* inpData = (float*)inp.data;
float* outData = (float*)out.data;
const int batchSize = inp.size[0];
const int numChannels = inp.size[1];
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
const float rheight = (outHeight > 1) ? static_cast<float>(inpHeight - 1) / (outHeight - 1) : 0.f;
const float rwidth = (outWidth > 1) ? static_cast<float>(inpWidth - 1) / (outWidth - 1) : 0.f;
for (int h2 = 0; h2 < outHeight; ++h2)
{
const float h1r = rheight * h2;
const int h1 = h1r;
const int h1p = (h1 < inpHeight - 1) ? 1 : 0;
const float h1lambda = h1r - h1;
const float h0lambda = 1.f - h1lambda;
for (int w2 = 0; w2 < outWidth; ++w2)
{
const float w1r = rwidth * w2;
const int w1 = w1r;
const int w1p = (w1 < inpWidth - 1) ? 1 : 0;
const float w1lambda = w1r - w1;
const float w0lambda = 1.f - w1lambda;
const float* pos1 = inpData + h1 * inpWidth + w1;
float* pos2 = outData + h2 * outWidth + w2;
for (int c = 0; c < batchSize * numChannels; ++c)
{
pos2[0] =
h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]);
pos1 += inpWidth * inpHeight;
pos2 += outWidth * outHeight;
}
}
}
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
private:
int outWidth, outHeight, zoomFactor;
};
TEST(Layer_Test_Interp, Accuracy)
{
CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
testLayerUsingCaffeModels("layer_interp", DNN_TARGET_CPU, false, false);
LayerFactory::unregisterLayer("Interp");
}
}} // namespace }} // namespace

@ -7,6 +7,8 @@
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
namespace opencv_test { namespace { namespace opencv_test { namespace {
TEST(blobFromImage_4ch, Regression) TEST(blobFromImage_4ch, Regression)
@ -75,4 +77,64 @@ TEST(readNet, Regression)
EXPECT_FALSE(net.empty()); EXPECT_FALSE(net.empty());
} }
class FirstCustomLayer CV_FINAL : public Layer
{
public:
FirstCustomLayer(const LayerParams &params) : Layer(params) {}
static Ptr<Layer> create(LayerParams& params)
{
return Ptr<Layer>(new FirstCustomLayer(params));
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat>& internals) CV_OVERRIDE
{
outputs[0].setTo(1);
}
};
class SecondCustomLayer CV_FINAL : public Layer
{
public:
SecondCustomLayer(const LayerParams &params) : Layer(params) {}
static Ptr<Layer> create(LayerParams& params)
{
return Ptr<Layer>(new SecondCustomLayer(params));
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat>& internals) CV_OVERRIDE
{
outputs[0].setTo(2);
}
};
TEST(LayerFactory, custom_layers)
{
LayerParams lp;
lp.name = "name";
lp.type = "CustomType";
Mat inp(1, 1, CV_32FC1);
for (int i = 0; i < 3; ++i)
{
if (i == 0) { CV_DNN_REGISTER_LAYER_CLASS(CustomType, FirstCustomLayer); }
else if (i == 1) { CV_DNN_REGISTER_LAYER_CLASS(CustomType, SecondCustomLayer); }
else if (i == 2) { LayerFactory::unregisterLayer("CustomType"); }
Net net;
net.addLayerToPrev(lp.name, lp.type, lp);
net.setInput(inp);
Mat output = net.forward();
if (i == 0) EXPECT_EQ(output.at<float>(0), 1);
else if (i == 1) EXPECT_EQ(output.at<float>(0), 2);
else if (i == 2) EXPECT_EQ(output.at<float>(0), 1);
}
LayerFactory::unregisterLayer("CustomType");
}
}} // namespace }} // namespace

@ -12,6 +12,8 @@ Test for Tensorflow models loading
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include "npy_blob.hpp" #include "npy_blob.hpp"
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
namespace opencv_test namespace opencv_test
{ {
@ -364,4 +366,95 @@ TEST(Test_TensorFlow, memory_read)
runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true); runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true);
} }
// Test a custom layer.
class ResizeBilinearLayer CV_FINAL : public Layer
{
public:
ResizeBilinearLayer(const LayerParams &params) : Layer(params)
{
CV_Assert(!params.get<bool>("align_corners", false));
CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1);
outHeight = blobs[0].at<int>(0, 0);
outWidth = blobs[0].at<int>(0, 1);
}
static Ptr<Layer> create(LayerParams& params)
{
return Ptr<Layer>(new ResizeBilinearLayer(params));
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const CV_OVERRIDE
{
std::vector<int> outShape(4);
outShape[0] = inputs[0][0]; // batch size
outShape[1] = inputs[0][1]; // number of channels
outShape[2] = outHeight;
outShape[3] = outWidth;
outputs.assign(1, outShape);
return false;
}
// This implementation is based on a reference implementation from
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
{
Mat& inp = *inputs[0];
Mat& out = outputs[0];
const float* inpData = (float*)inp.data;
float* outData = (float*)out.data;
const int batchSize = inp.size[0];
const int numChannels = inp.size[1];
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
float heightScale = static_cast<float>(inpHeight) / outHeight;
float widthScale = static_cast<float>(inpWidth) / outWidth;
for (int b = 0; b < batchSize; ++b)
{
for (int y = 0; y < outHeight; ++y)
{
float input_y = y * heightScale;
int y0 = static_cast<int>(std::floor(input_y));
int y1 = std::min(y0 + 1, inpHeight - 1);
for (int x = 0; x < outWidth; ++x)
{
float input_x = x * widthScale;
int x0 = static_cast<int>(std::floor(input_x));
int x1 = std::min(x0 + 1, inpWidth - 1);
for (int c = 0; c < numChannels; ++c)
{
float interpolation =
inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
outData[offset(out.size, c, x, y, b)] = interpolation;
}
}
}
}
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
private:
static inline int offset(const MatSize& size, int c, int x, int y, int b)
{
return x + size[3] * (y + size[2] * (c + size[1] * b));
}
int outWidth, outHeight;
};
TEST(Test_TensorFlow, resize_bilinear)
{
CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
runTensorFlowNet("resize_bilinear");
LayerFactory::unregisterLayer("ResizeBilinear");
}
} }

@ -42,6 +42,7 @@
#include "test_precomp.hpp" #include "test_precomp.hpp"
#include "npy_blob.hpp" #include "npy_blob.hpp"
#include <opencv2/dnn/shape_utils.hpp> #include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS
namespace opencv_test namespace opencv_test
{ {
@ -325,4 +326,62 @@ TEST(Torch_Importer, net_residual)
runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true); runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true);
} }
// Test a custom layer
// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
class SpatialUpSamplingNearestLayer CV_FINAL : public Layer
{
public:
SpatialUpSamplingNearestLayer(const LayerParams &params) : Layer(params)
{
scale = params.get<int>("scale_factor");
}
static Ptr<Layer> create(LayerParams& params)
{
return Ptr<Layer>(new SpatialUpSamplingNearestLayer(params));
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const CV_OVERRIDE
{
std::vector<int> outShape(4);
outShape[0] = inputs[0][0]; // batch size
outShape[1] = inputs[0][1]; // number of channels
outShape[2] = scale * inputs[0][2];
outShape[3] = scale * inputs[0][3];
outputs.assign(1, outShape);
return false;
}
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals) CV_OVERRIDE
{
Mat& inp = *inputs[0];
Mat& out = outputs[0];
const int outHeight = out.size[2];
const int outWidth = out.size[3];
for (size_t n = 0; n < inputs[0]->size[0]; ++n)
{
for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
{
resize(getPlane(inp, n, ch), getPlane(out, n, ch),
Size(outWidth, outHeight), 0, 0, INTER_NEAREST);
}
}
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
private:
int scale;
};
TEST(Torch_Importer, upsampling_nearest)
{
CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
runTorchNet("net_spatial_upsampling_nearest", DNN_TARGET_CPU, "", false, true);
LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
}
} }

@ -0,0 +1,232 @@
#include <opencv2/dnn.hpp>
//! [A custom layer interface]
class MyLayer : public cv::dnn::Layer
{
public:
//! [MyLayer::MyLayer]
MyLayer(const cv::dnn::LayerParams &params);
//! [MyLayer::MyLayer]
//! [MyLayer::create]
static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params);
//! [MyLayer::create]
//! [MyLayer::getMemoryShapes]
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const;
//! [MyLayer::getMemoryShapes]
//! [MyLayer::forward]
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals);
//! [MyLayer::forward]
//! [MyLayer::finalize]
virtual void finalize(const std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs);
//! [MyLayer::finalize]
virtual void forward(cv::InputArrayOfArrays inputs, cv::OutputArrayOfArrays outputs, cv::OutputArrayOfArrays internals);
};
//! [A custom layer interface]
//! [InterpLayer]
class InterpLayer : public cv::dnn::Layer
{
public:
InterpLayer(const cv::dnn::LayerParams &params) : Layer(params)
{
outWidth = params.get<int>("width", 0);
outHeight = params.get<int>("height", 0);
}
static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params)
{
return cv::Ptr<cv::dnn::Layer>(new InterpLayer(params));
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const
{
CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
std::vector<int> outShape(4);
outShape[0] = inputs[0][0]; // batch size
outShape[1] = inputs[0][1]; // number of channels
outShape[2] = outHeight;
outShape[3] = outWidth;
outputs.assign(1, outShape);
return false;
}
// Implementation of this custom layer is based on https://github.com/cdmh/deeplab-public/blob/master/src/caffe/layers/interp_layer.cpp
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
{
CV_UNUSED(internals);
cv::Mat& inp = *inputs[0];
cv::Mat& out = outputs[0];
const float* inpData = (float*)inp.data;
float* outData = (float*)out.data;
const int batchSize = inp.size[0];
const int numChannels = inp.size[1];
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
const float rheight = (outHeight > 1) ? static_cast<float>(inpHeight - 1) / (outHeight - 1) : 0.f;
const float rwidth = (outWidth > 1) ? static_cast<float>(inpWidth - 1) / (outWidth - 1) : 0.f;
for (int h2 = 0; h2 < outHeight; ++h2)
{
const float h1r = rheight * h2;
const int h1 = static_cast<int>(h1r);
const int h1p = (h1 < inpHeight - 1) ? 1 : 0;
const float h1lambda = h1r - h1;
const float h0lambda = 1.f - h1lambda;
for (int w2 = 0; w2 < outWidth; ++w2)
{
const float w1r = rwidth * w2;
const int w1 = static_cast<int>(w1r);
const int w1p = (w1 < inpWidth - 1) ? 1 : 0;
const float w1lambda = w1r - w1;
const float w0lambda = 1.f - w1lambda;
const float* pos1 = inpData + h1 * inpWidth + w1;
float* pos2 = outData + h2 * outWidth + w2;
for (int c = 0; c < batchSize * numChannels; ++c)
{
pos2[0] =
h0lambda * (w0lambda * pos1[0] + w1lambda * pos1[w1p]) +
h1lambda * (w0lambda * pos1[h1p * inpWidth] + w1lambda * pos1[h1p * inpWidth + w1p]);
pos1 += inpWidth * inpHeight;
pos2 += outWidth * outHeight;
}
}
}
}
virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
private:
int outWidth, outHeight;
};
//! [InterpLayer]
//! [ResizeBilinearLayer]
class ResizeBilinearLayer : public cv::dnn::Layer
{
public:
ResizeBilinearLayer(const cv::dnn::LayerParams &params) : Layer(params)
{
CV_Assert(!params.get<bool>("align_corners", false));
CV_Assert(blobs.size() == 1, blobs[0].type() == CV_32SC1);
outHeight = blobs[0].at<int>(0, 0);
outWidth = blobs[0].at<int>(0, 1);
}
static cv::Ptr<cv::dnn::Layer> create(cv::dnn::LayerParams& params)
{
return cv::Ptr<cv::dnn::Layer>(new ResizeBilinearLayer(params));
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int requiredOutputs,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &internals) const
{
CV_UNUSED(requiredOutputs); CV_UNUSED(internals);
std::vector<int> outShape(4);
outShape[0] = inputs[0][0]; // batch size
outShape[1] = inputs[0][1]; // number of channels
outShape[2] = outHeight;
outShape[3] = outWidth;
outputs.assign(1, outShape);
return false;
}
// This implementation is based on a reference implementation from
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h
virtual void forward(std::vector<cv::Mat*> &inputs, std::vector<cv::Mat> &outputs, std::vector<cv::Mat> &internals)
{
CV_UNUSED(internals);
cv::Mat& inp = *inputs[0];
cv::Mat& out = outputs[0];
const float* inpData = (float*)inp.data;
float* outData = (float*)out.data;
const int batchSize = inp.size[0];
const int numChannels = inp.size[1];
const int inpHeight = inp.size[2];
const int inpWidth = inp.size[3];
float heightScale = static_cast<float>(inpHeight) / outHeight;
float widthScale = static_cast<float>(inpWidth) / outWidth;
for (int b = 0; b < batchSize; ++b)
{
for (int y = 0; y < outHeight; ++y)
{
float input_y = y * heightScale;
int y0 = static_cast<int>(std::floor(input_y));
int y1 = std::min(y0 + 1, inpHeight - 1);
for (int x = 0; x < outWidth; ++x)
{
float input_x = x * widthScale;
int x0 = static_cast<int>(std::floor(input_x));
int x1 = std::min(x0 + 1, inpWidth - 1);
for (int c = 0; c < numChannels; ++c)
{
float interpolation =
inpData[offset(inp.size, c, x0, y0, b)] * (1 - (input_y - y0)) * (1 - (input_x - x0)) +
inpData[offset(inp.size, c, x0, y1, b)] * (input_y - y0) * (1 - (input_x - x0)) +
inpData[offset(inp.size, c, x1, y0, b)] * (1 - (input_y - y0)) * (input_x - x0) +
inpData[offset(inp.size, c, x1, y1, b)] * (input_y - y0) * (input_x - x0);
outData[offset(out.size, c, x, y, b)] = interpolation;
}
}
}
}
}
virtual void forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
private:
static inline int offset(const cv::MatSize& size, int c, int x, int y, int b)
{
return x + size[3] * (y + size[2] * (c + size[1] * b));
}
int outWidth, outHeight;
};
//! [ResizeBilinearLayer]
//! [Register a custom layer]
#include <opencv2/dnn/layer.details.hpp> // CV_DNN_REGISTER_LAYER_CLASS macro
int main(int argc, char** argv)
{
CV_DNN_REGISTER_LAYER_CLASS(MyType, MyLayer);
// ...
//! [Register a custom layer]
CV_UNUSED(argc); CV_UNUSED(argv);
//! [Register InterpLayer]
CV_DNN_REGISTER_LAYER_CLASS(Interp, InterpLayer);
cv::dnn::Net caffeNet = cv::dnn::readNet("/path/to/config.prototxt", "/path/to/weights.caffemodel");
//! [Register InterpLayer]
//! [Register ResizeBilinearLayer]
CV_DNN_REGISTER_LAYER_CLASS(ResizeBilinear, ResizeBilinearLayer);
cv::dnn::Net tfNet = cv::dnn::readNet("/path/to/graph.pb");
//! [Register ResizeBilinearLayer]
}
cv::Ptr<cv::dnn::Layer> MyLayer::create(cv::dnn::LayerParams& params)
{
return cv::Ptr<cv::dnn::Layer>(new MyLayer(params));
}
MyLayer::MyLayer(const cv::dnn::LayerParams&) {}
bool MyLayer::getMemoryShapes(const std::vector<std::vector<int> >&, const int,
std::vector<std::vector<int> >&,
std::vector<std::vector<int> >&) const { return false; }
void MyLayer::forward(std::vector<cv::Mat*>&, std::vector<cv::Mat>&, std::vector<cv::Mat>&) {}
void MyLayer::finalize(const std::vector<cv::Mat*>&, std::vector<cv::Mat>&) {}
void MyLayer::forward(cv::InputArrayOfArrays, cv::OutputArrayOfArrays, cv::OutputArrayOfArrays) {}
Loading…
Cancel
Save