From 6e23d93b3955f431156ac849468c00225fd2bb0c Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Sat, 6 Jun 2015 19:08:12 +0300 Subject: [PATCH] Added simple .prototxt Caffe importer. --- modules/dnn/CMakeLists.txt | 7 +- modules/dnn/include/opencv2/dict.hpp | 178 ++ modules/dnn/include/opencv2/dnn.hpp | 57 +- modules/dnn/src/caffe/common.hpp | 177 ++ modules/dnn/src/caffe/io.cpp | 232 ++ modules/dnn/src/caffe/upgrade_proto.cpp | 940 +++++++ modules/dnn/src/caffe/util/io.hpp | 152 ++ modules/dnn/src/caffe/util/upgrade_proto.hpp | 64 + modules/dnn/src/caffe_importer.cpp | 152 +- modules/dnn/src/dnn.cpp | 18 +- modules/dnn/src/precomp.hpp | 2 + modules/dnn/test/test_caffe_importer.cpp | 31 + modules/dnn/test/test_main.cpp | 3 + modules/dnn/test/test_precomp.hpp | 19 + .../testdata/dnn/googlenet_deploy.prototxt | 2156 +++++++++++++++++ modules/dnn/testdata/dnn/gtsrb.prototxt | 167 ++ 16 files changed, 4294 insertions(+), 61 deletions(-) create mode 100644 modules/dnn/include/opencv2/dict.hpp create mode 100644 modules/dnn/src/caffe/common.hpp create mode 100644 modules/dnn/src/caffe/io.cpp create mode 100644 modules/dnn/src/caffe/upgrade_proto.cpp create mode 100644 modules/dnn/src/caffe/util/io.hpp create mode 100644 modules/dnn/src/caffe/util/upgrade_proto.hpp create mode 100644 modules/dnn/src/precomp.hpp create mode 100644 modules/dnn/test/test_caffe_importer.cpp create mode 100644 modules/dnn/test/test_main.cpp create mode 100644 modules/dnn/test/test_precomp.hpp create mode 100644 modules/dnn/testdata/dnn/googlenet_deploy.prototxt create mode 100644 modules/dnn/testdata/dnn/gtsrb.prototxt diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 1bd753a45..c72b3fb79 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -21,5 +21,10 @@ ocv_add_module(dnn opencv_imgproc opencv_core opencv_highgui WRAP python matlab) ocv_glob_module_sources(${PROTO_SRCS} ${PROTO_HDRS}) ocv_source_group("Src\\protobuf" FILES ${PROTO_SRCS} ${PROTO_HDRS}) ocv_module_include_directories(${PROTOBUF_INCLUDE_DIR}) + ocv_create_module() -target_link_libraries(opencv_dnn ${PROTOBUF_LIBRARIES}) \ No newline at end of file + +target_link_libraries(opencv_dnn ${PROTOBUF_LIBRARIES}) +ocv_add_accuracy_tests() +ocv_add_perf_tests() +ocv_add_samples() \ No newline at end of file diff --git a/modules/dnn/include/opencv2/dict.hpp b/modules/dnn/include/opencv2/dict.hpp new file mode 100644 index 000000000..6740670ac --- /dev/null +++ b/modules/dnn/include/opencv2/dict.hpp @@ -0,0 +1,178 @@ +#pragma once +#include + +namespace cv +{ +namespace dnn +{ + +struct DictValue +{ + int type; + + union + { + int i; + unsigned u; + double d; + bool b; + String *s; + }; + + DictValue(const DictValue &r); + DictValue(int p = 0) : type(cv::Param::INT), i(p) {} + DictValue(unsigned p) : type(cv::Param::UNSIGNED_INT), u(p) {} + DictValue(double p) : type(cv::Param::REAL), d(p) {} + DictValue(bool p) : type(cv::Param::BOOLEAN), b(p) {} + DictValue(const String &p) : type(cv::Param::STRING), s(new String(p)) {} + + template + T get() const; + + template + const T &get() const; + + DictValue &operator=(const DictValue &r); + + ~DictValue(); + +private: + void release(); +}; + +class Dict +{ + //TODO: maybe this mechanism was realized somewhere in OpenCV? + typedef std::map _Dict; + _Dict dict; + +public: + + template + const T &get(const String &name) const + { + _Dict::const_iterator i = dict.find(name); + CV_Assert(i != dict.end()); + return i->second.get(); + } + + template + const T &get(const String &name, const T &default) const + { + _Dict::const_iterator i = dict.find(name); + + if (i != dict.end()) + return i->second.get(); + else + return default; + } + + template + const T &set(const String &name, const T &value) + { + _Dict::iterator i = dict.find(name); + + if (i != dict.end()) + i->second = DictValue(value); + else + dict.insert(std::make_pair(name, DictValue(value))); + + return value; + } +}; + + +template<> +inline int DictValue::get() const +{ + CV_Assert(type == cv::ParamType::type || type == cv::ParamType::type && (int)u >= 0); + return i; +} + +template<> +inline unsigned DictValue::get() const +{ + CV_Assert(type == cv::ParamType::type || type == cv::ParamType::type && i >= 0); + return u; +} + +template<> +inline double DictValue::get() const +{ + CV_Assert(type == cv::ParamType::type); + return d; +} + +template<> +inline float DictValue::get() const +{ + CV_Assert(type == cv::ParamType::type); + return (float)d; +} + +template<> +inline bool DictValue::get() const +{ + if (type == cv::ParamType::type) + { + return b; + } + else if (type == cv::ParamType::type || type == cv::ParamType::type) + { + return i; + } + else + { + CV_Assert(type == cv::ParamType::type || type == cv::ParamType::type || type == cv::ParamType::type); + return 0; + } +} + +template<> +inline const String &DictValue::get() const +{ + CV_Assert(type == cv::ParamType::type); + return *s; +} + +inline void DictValue::release() +{ + if (type == cv::Param::STRING && s != NULL) + { + delete s; + s = NULL; + } + +} + +inline DictValue::~DictValue() +{ + release(); +} + +inline DictValue & DictValue::operator=(const DictValue &r) +{ + if (&r == this) + return *this; + + release(); + + //how to copy anonymous union without memcpy? + for (size_t i = 0; i < sizeof(*this); i++) + ((uchar*)this)[i] = ((uchar*)&r)[i]; + + if (r.type == cv::Param::STRING) + { + s = new String(*r.s); + } + + return *this; +} + +inline DictValue::DictValue(const DictValue &r) +{ + *this = r; +} + +} +} \ No newline at end of file diff --git a/modules/dnn/include/opencv2/dnn.hpp b/modules/dnn/include/opencv2/dnn.hpp index 7ed2cd553..572de8dd8 100644 --- a/modules/dnn/include/opencv2/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn.hpp @@ -4,6 +4,7 @@ #include #include #include +#include "dict.hpp" namespace cv { @@ -16,13 +17,15 @@ namespace dnn class LayerParams; //wrapper over cv::Mat and cv::UMat - class Blob : public _InputOutputArray + CV_EXPORTS class Blob { + Mat m; + public: - Blob(Mat &in); - Blob(const Mat &in); - Blob(UMat &in); - Blob(const UMat &in); + Blob(); + Blob(InputArray in); + + bool empty() const; int width() const; //cols int height() const; //rows @@ -32,34 +35,16 @@ namespace dnn Vec4i size() const; }; - class LayerParams + CV_EXPORTS class LayerParams : public Dict { - struct Value - { - int type; - - union - { - int i; - double d; - String *s; - Mat *m; - } data; - }; - - //TODO: maybe this mechanism was realized somewhere in OpenCV? - std::map params; public: std::vector learnedWeights; - - template - T get(const String &name); }; //this class allows to build new Layers - class Layer : public Algorithm + CV_EXPORTS class Layer { public: //Layer registration routines @@ -104,11 +89,11 @@ namespace dnn //TODO: maybe eliminate all int ids and replace them by string names //Proxy class for different formats //Each format importer must populate it - class NetConfiguration + CV_EXPORTS class NetConfiguration { public: - static Ptr create(); + CV_EXPORTS static Ptr create(); int addLayer(const String &name, const String &type); @@ -141,24 +126,24 @@ namespace dnn }; - class Net + CV_EXPORTS class Net { public: - static Ptr create(Ptr config); + CV_EXPORTS static Ptr create(Ptr config); - virtual ~Net(); + virtual ~Net() = 0; - virtual int getBlobId(int layerId, int outputId); + virtual int getBlobId(int layerId, int outputId) = 0; - virtual int getBlobId(const String &blobName); + virtual int getBlobId(const String &blobName) = 0; - virtual void forward(std::vector< int, Ptr > &inputBlobs, std::vector > &outputBlobs); + virtual void forward(std::vector< int, Ptr > &inputBlobs, std::vector > &outputBlobs) = 0; - virtual void forward(int layer, std::vector > &layerOutputs); + virtual void forward(int layer, std::vector > &layerOutputs) = 0; }; - class Importer + CV_EXPORTS class Importer { public: @@ -167,7 +152,7 @@ namespace dnn virtual ~Importer(); }; - Ptr createCaffeImporter(const String &prototxt, const String &caffeModel); + CV_EXPORTS Ptr createCaffeImporter(const String &prototxt, const String &caffeModel); } } diff --git a/modules/dnn/src/caffe/common.hpp b/modules/dnn/src/caffe/common.hpp new file mode 100644 index 000000000..efe558ea5 --- /dev/null +++ b/modules/dnn/src/caffe/common.hpp @@ -0,0 +1,177 @@ +#ifndef CAFFE_COMMON_HPP_ +#define CAFFE_COMMON_HPP_ + +#include +#include +#define CHECK CV_Assert +#define LOG(WHERE) std::cerr + +//#include +//#include +//#include + +#include +#include +#include // NOLINT(readability/streams) +#include // NOLINT(readability/streams) +#include +#include +#include +#include +#include // pair +#include + +//#include "caffe/util/device_alternate.hpp" +// +//// gflags 2.1 issue: namespace google was changed to gflags without warning. +//// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version +//// 2.1. If yes, we will add a temporary solution to redirect the namespace. +//// TODO(Yangqing): Once gflags solves the problem in a more elegant way, let's +//// remove the following hack. +//#ifndef GFLAGS_GFLAGS_H_ +//namespace gflags = google; +//#endif // GFLAGS_GFLAGS_H_ +// +//// Disable the copy and assignment operator for a class. +//#define DISABLE_COPY_AND_ASSIGN(classname) \ +//private:\ +// classname(const classname&);\ +// classname& operator=(const classname&) +// +//// Instantiate a class with float and double specifications. +//#define INSTANTIATE_CLASS(classname) \ +// char gInstantiationGuard##classname; \ +// template class classname; \ +// template class classname +// +//#define INSTANTIATE_LAYER_GPU_FORWARD(classname) \ +// template void classname::Forward_gpu( \ +// const std::vector*>& bottom, \ +// const std::vector*>& top); \ +// template void classname::Forward_gpu( \ +// const std::vector*>& bottom, \ +// const std::vector*>& top); +// +//#define INSTANTIATE_LAYER_GPU_BACKWARD(classname) \ +// template void classname::Backward_gpu( \ +// const std::vector*>& top, \ +// const std::vector& propagate_down, \ +// const std::vector*>& bottom); \ +// template void classname::Backward_gpu( \ +// const std::vector*>& top, \ +// const std::vector& propagate_down, \ +// const std::vector*>& bottom) +// +//#define INSTANTIATE_LAYER_GPU_FUNCS(classname) \ +// INSTANTIATE_LAYER_GPU_FORWARD(classname); \ +// INSTANTIATE_LAYER_GPU_BACKWARD(classname) +// +//// A simple macro to mark codes that are not implemented, so that when the code +//// is executed we will see a fatal log. +//#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet" +// +//// See PR #1236 +//namespace cv { class Mat; } +// +//namespace caffe { +// +//// We will use the boost shared_ptr instead of the new C++11 one mainly +//// because cuda does not work (at least now) well with C++11 features. +//using boost::shared_ptr; + +// Common functions and classes from std that caffe often uses. +using std::fstream; +using std::ios; +using std::isnan; +using std::isinf; +using std::iterator; +using std::make_pair; +using std::map; +using std::ostringstream; +using std::pair; +using std::set; +using std::string; +using std::stringstream; +using std::vector; + +//// A global initialization function that you should call in your main function. +//// Currently it initializes google flags and google logging. +//void GlobalInit(int* pargc, char*** pargv); +// +//// A singleton class to hold common caffe stuff, such as the handler that +//// caffe is going to use for cublas, curand, etc. +//class Caffe { +// public: +// ~Caffe(); +// inline static Caffe& Get() { +// if (!singleton_.get()) { +// singleton_.reset(new Caffe()); +// } +// return *singleton_; +// } +// enum Brew { CPU, GPU }; +// +// // This random number generator facade hides boost and CUDA rng +// // implementation from one another (for cross-platform compatibility). +// class RNG { +// public: +// RNG(); +// explicit RNG(unsigned int seed); +// explicit RNG(const RNG&); +// RNG& operator=(const RNG&); +// void* generator(); +// private: +// class Generator; +// shared_ptr generator_; +// }; +// +// // Getters for boost rng, curand, and cublas handles +// inline static RNG& rng_stream() { +// if (!Get().random_generator_) { +// Get().random_generator_.reset(new RNG()); +// } +// return *(Get().random_generator_); +// } +//#ifndef CPU_ONLY +// inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; } +// inline static curandGenerator_t curand_generator() { +// return Get().curand_generator_; +// } +//#endif +// +// // Returns the mode: running on CPU or GPU. +// inline static Brew mode() { return Get().mode_; } +// // The setters for the variables +// // Sets the mode. It is recommended that you don't change the mode halfway +// // into the program since that may cause allocation of pinned memory being +// // freed in a non-pinned way, which may cause problems - I haven't verified +// // it personally but better to note it here in the header file. +// inline static void set_mode(Brew mode) { Get().mode_ = mode; } +// // Sets the random seed of both boost and curand +// static void set_random_seed(const unsigned int seed); +// // Sets the device. Since we have cublas and curand stuff, set device also +// // requires us to reset those values. +// static void SetDevice(const int device_id); +// // Prints the current GPU status. +// static void DeviceQuery(); +// +// protected: +//#ifndef CPU_ONLY +// cublasHandle_t cublas_handle_; +// curandGenerator_t curand_generator_; +//#endif +// shared_ptr random_generator_; +// +// Brew mode_; +// static shared_ptr singleton_; +// +// private: +// // The private constructor to avoid duplicate instantiation. +// Caffe(); +// +// DISABLE_COPY_AND_ASSIGN(Caffe); +//}; +// +//} // namespace caffe +// +#endif // CAFFE_COMMON_HPP_ diff --git a/modules/dnn/src/caffe/io.cpp b/modules/dnn/src/caffe/io.cpp new file mode 100644 index 000000000..f573b83c2 --- /dev/null +++ b/modules/dnn/src/caffe/io.cpp @@ -0,0 +1,232 @@ +//#include +#include +#include +#include +#include +//#include +//#include +//#include +//#include +// +//#include +#include // NOLINT(readability/streams) +//#include +//#include +// +//#include "caffe/common.hpp" +//#include "caffe.pb.h" +//#include "caffe/util/io.hpp" +// +const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. + +namespace caffe { + +using google::protobuf::io::FileInputStream; +using google::protobuf::io::FileOutputStream; +using google::protobuf::io::ZeroCopyInputStream; +using google::protobuf::io::CodedInputStream; +using google::protobuf::io::ZeroCopyOutputStream; +using google::protobuf::io::CodedOutputStream; +using google::protobuf::io::IstreamInputStream; +using google::protobuf::Message; + +bool ReadProtoFromTextFile(const char* filename, Message* proto) { + std::ifstream fs(filename, std::ifstream::in); + CV_Assert(fs.is_open()); + IstreamInputStream input(&fs); + bool success = google::protobuf::TextFormat::Parse(&input, proto); + fs.close(); + return success; +} + +// +//void WriteProtoToTextFile(const Message& proto, const char* filename) { +// int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); +// FileOutputStream* output = new FileOutputStream(fd); +// CHECK(google::protobuf::TextFormat::Print(proto, output)); +// delete output; +// close(fd); +//} +// +bool ReadProtoFromBinaryFile(const char* filename, Message* proto) { + std::ifstream fs(filename, std::ifstream::in | std::ifstream::binary); + CV_Assert(fs.is_open()); + ZeroCopyInputStream* raw_input = new IstreamInputStream(&fs); + CodedInputStream* coded_input = new CodedInputStream(raw_input); + coded_input->SetTotalBytesLimit(kProtoReadBytesLimit, 536870912); + + bool success = proto->ParseFromCodedStream(coded_input); + + delete coded_input; + delete raw_input; + fs.close(); + return success; +} +// +//void WriteProtoToBinaryFile(const Message& proto, const char* filename) { +// fstream output(filename, ios::out | ios::trunc | ios::binary); +// CHECK(proto.SerializeToOstream(&output)); +//} +// +//cv::Mat ReadImageToCVMat(const string& filename, +// const int height, const int width, const bool is_color) { +// cv::Mat cv_img; +// int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : +// CV_LOAD_IMAGE_GRAYSCALE); +// cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag); +// if (!cv_img_origin.data) { +// LOG(ERROR) << "Could not open or find file " << filename; +// return cv_img_origin; +// } +// if (height > 0 && width > 0) { +// cv::resize(cv_img_origin, cv_img, cv::Size(width, height)); +// } else { +// cv_img = cv_img_origin; +// } +// return cv_img; +//} +// +//cv::Mat ReadImageToCVMat(const string& filename, +// const int height, const int width) { +// return ReadImageToCVMat(filename, height, width, true); +//} +// +//cv::Mat ReadImageToCVMat(const string& filename, +// const bool is_color) { +// return ReadImageToCVMat(filename, 0, 0, is_color); +//} +// +//cv::Mat ReadImageToCVMat(const string& filename) { +// return ReadImageToCVMat(filename, 0, 0, true); +//} +//// Do the file extension and encoding match? +//static bool matchExt(const std::string & fn, +// std::string en) { +// size_t p = fn.rfind('.'); +// std::string ext = p != fn.npos ? fn.substr(p) : fn; +// std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower); +// std::transform(en.begin(), en.end(), en.begin(), ::tolower); +// if ( ext == en ) +// return true; +// if ( en == "jpg" && ext == "jpeg" ) +// return true; +// return false; +//} +//bool ReadImageToDatum(const string& filename, const int label, +// const int height, const int width, const bool is_color, +// const std::string & encoding, Datum* datum) { +// cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); +// if (cv_img.data) { +// if (encoding.size()) { +// if ( (cv_img.channels() == 3) == is_color && !height && !width && +// matchExt(filename, encoding) ) +// return ReadFileToDatum(filename, label, datum); +// std::vector buf; +// cv::imencode("."+encoding, cv_img, buf); +// datum->set_data(std::string(reinterpret_cast(&buf[0]), +// buf.size())); +// datum->set_label(label); +// datum->set_encoded(true); +// return true; +// } +// CVMatToDatum(cv_img, datum); +// datum->set_label(label); +// return true; +// } else { +// return false; +// } +//} +// +//bool ReadFileToDatum(const string& filename, const int label, +// Datum* datum) { +// std::streampos size; +// +// fstream file(filename.c_str(), ios::in|ios::binary|ios::ate); +// if (file.is_open()) { +// size = file.tellg(); +// std::string buffer(size, ' '); +// file.seekg(0, ios::beg); +// file.read(&buffer[0], size); +// file.close(); +// datum->set_data(buffer); +// datum->set_label(label); +// datum->set_encoded(true); +// return true; +// } else { +// return false; +// } +//} +// +//cv::Mat DecodeDatumToCVMatNative(const Datum& datum) { +// cv::Mat cv_img; +// CHECK(datum.encoded()) << "Datum not encoded"; +// const string& data = datum.data(); +// std::vector vec_data(data.c_str(), data.c_str() + data.size()); +// cv_img = cv::imdecode(vec_data, -1); +// if (!cv_img.data) { +// LOG(ERROR) << "Could not decode datum "; +// } +// return cv_img; +//} +//cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color) { +// cv::Mat cv_img; +// CHECK(datum.encoded()) << "Datum not encoded"; +// const string& data = datum.data(); +// std::vector vec_data(data.c_str(), data.c_str() + data.size()); +// int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR : +// CV_LOAD_IMAGE_GRAYSCALE); +// cv_img = cv::imdecode(vec_data, cv_read_flag); +// if (!cv_img.data) { +// LOG(ERROR) << "Could not decode datum "; +// } +// return cv_img; +//} +// +//// If Datum is encoded will decoded using DecodeDatumToCVMat and CVMatToDatum +//// If Datum is not encoded will do nothing +//bool DecodeDatumNative(Datum* datum) { +// if (datum->encoded()) { +// cv::Mat cv_img = DecodeDatumToCVMatNative((*datum)); +// CVMatToDatum(cv_img, datum); +// return true; +// } else { +// return false; +// } +//} +//bool DecodeDatum(Datum* datum, bool is_color) { +// if (datum->encoded()) { +// cv::Mat cv_img = DecodeDatumToCVMat((*datum), is_color); +// CVMatToDatum(cv_img, datum); +// return true; +// } else { +// return false; +// } +//} +// +//void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) { +// CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; +// datum->set_channels(cv_img.channels()); +// datum->set_height(cv_img.rows); +// datum->set_width(cv_img.cols); +// datum->clear_data(); +// datum->clear_float_data(); +// datum->set_encoded(false); +// int datum_channels = datum->channels(); +// int datum_height = datum->height(); +// int datum_width = datum->width(); +// int datum_size = datum_channels * datum_height * datum_width; +// std::string buffer(datum_size, ' '); +// for (int h = 0; h < datum_height; ++h) { +// const uchar* ptr = cv_img.ptr(h); +// int img_index = 0; +// for (int w = 0; w < datum_width; ++w) { +// for (int c = 0; c < datum_channels; ++c) { +// int datum_index = (c * datum_height + h) * datum_width + w; +// buffer[datum_index] = static_cast(ptr[img_index++]); +// } +// } +// } +// datum->set_data(buffer); +//} + +} // namespace caffe diff --git a/modules/dnn/src/caffe/upgrade_proto.cpp b/modules/dnn/src/caffe/upgrade_proto.cpp new file mode 100644 index 000000000..32ba9020f --- /dev/null +++ b/modules/dnn/src/caffe/upgrade_proto.cpp @@ -0,0 +1,940 @@ +#include +#include +#include + +#include +#include + +#include "caffe/common.hpp" +#include "caffe.pb.h" +#include "caffe/util/io.hpp" +#include "caffe/util/upgrade_proto.hpp" + +namespace caffe { + +bool NetNeedsUpgrade(const NetParameter& net_param) { + return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param); +} + +bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) { + for (int i = 0; i < net_param.layers_size(); ++i) { + if (net_param.layers(i).has_layer()) { + return true; + } + } + return false; +} + +bool NetNeedsV1ToV2Upgrade(const NetParameter& net_param) { + return net_param.layers_size() > 0; +} + +bool UpgradeV0Net(const NetParameter& v0_net_param_padding_layers, + NetParameter* net_param) { + // First upgrade padding layers to padded conv layers. + NetParameter v0_net_param; + UpgradeV0PaddingLayers(v0_net_param_padding_layers, &v0_net_param); + // Now upgrade layer parameters. + bool is_fully_compatible = true; + net_param->Clear(); + if (v0_net_param.has_name()) { + net_param->set_name(v0_net_param.name()); + } + for (int i = 0; i < v0_net_param.layers_size(); ++i) { + is_fully_compatible &= UpgradeV0LayerParameter(v0_net_param.layers(i), + net_param->add_layers()); + } + for (int i = 0; i < v0_net_param.input_size(); ++i) { + net_param->add_input(v0_net_param.input(i)); + } + for (int i = 0; i < v0_net_param.input_dim_size(); ++i) { + net_param->add_input_dim(v0_net_param.input_dim(i)); + } + if (v0_net_param.has_force_backward()) { + net_param->set_force_backward(v0_net_param.force_backward()); + } + return is_fully_compatible; +} + +void UpgradeV0PaddingLayers(const NetParameter& param, + NetParameter* param_upgraded_pad) { + // Copy everything other than the layers from the original param. + param_upgraded_pad->Clear(); + param_upgraded_pad->CopyFrom(param); + param_upgraded_pad->clear_layers(); + // Figure out which layer each bottom blob comes from. + map blob_name_to_last_top_idx; + for (int i = 0; i < param.input_size(); ++i) { + const string& blob_name = param.input(i); + blob_name_to_last_top_idx[blob_name] = -1; + } + for (int i = 0; i < param.layers_size(); ++i) { + const V1LayerParameter& layer_connection = param.layers(i); + const V0LayerParameter& layer_param = layer_connection.layer(); + // Add the layer to the new net, unless it's a padding layer. + if (layer_param.type() != "padding") { + param_upgraded_pad->add_layers()->CopyFrom(layer_connection); + } + for (int j = 0; j < layer_connection.bottom_size(); ++j) { + const string& blob_name = layer_connection.bottom(j); + if (blob_name_to_last_top_idx.find(blob_name) == + blob_name_to_last_top_idx.end()) { + LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j; + } + const int top_idx = blob_name_to_last_top_idx[blob_name]; + if (top_idx == -1) { + continue; + } + const V1LayerParameter& source_layer = param.layers(top_idx); + if (source_layer.layer().type() == "padding") { + // This layer has a padding layer as input -- check that it is a conv + // layer or a pooling layer and takes only one input. Also check that + // the padding layer input has only one input and one output. Other + // cases have undefined behavior in Caffe. + CHECK((layer_param.type() == "conv") || (layer_param.type() == "pool")) + << "Padding layer input to " + "non-convolutional / non-pooling layer type " + << layer_param.type(); + CHECK_EQ(layer_connection.bottom_size(), 1) + << "Conv Layer takes a single blob as input."; + CHECK_EQ(source_layer.bottom_size(), 1) + << "Padding Layer takes a single blob as input."; + CHECK_EQ(source_layer.top_size(), 1) + << "Padding Layer produces a single blob as output."; + int layer_index = param_upgraded_pad->layers_size() - 1; + param_upgraded_pad->mutable_layers(layer_index)->mutable_layer() + ->set_pad(source_layer.layer().pad()); + param_upgraded_pad->mutable_layers(layer_index) + ->set_bottom(j, source_layer.bottom(0)); + } + } + for (int j = 0; j < layer_connection.top_size(); ++j) { + const string& blob_name = layer_connection.top(j); + blob_name_to_last_top_idx[blob_name] = i; + } + } +} + +bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, + V1LayerParameter* layer_param) { + bool is_fully_compatible = true; + layer_param->Clear(); + for (int i = 0; i < v0_layer_connection.bottom_size(); ++i) { + layer_param->add_bottom(v0_layer_connection.bottom(i)); + } + for (int i = 0; i < v0_layer_connection.top_size(); ++i) { + layer_param->add_top(v0_layer_connection.top(i)); + } + if (v0_layer_connection.has_layer()) { + const V0LayerParameter& v0_layer_param = v0_layer_connection.layer(); + if (v0_layer_param.has_name()) { + layer_param->set_name(v0_layer_param.name()); + } + const string& type = v0_layer_param.type(); + if (v0_layer_param.has_type()) { + layer_param->set_type(UpgradeV0LayerType(type)); + } + for (int i = 0; i < v0_layer_param.blobs_size(); ++i) { + layer_param->add_blobs()->CopyFrom(v0_layer_param.blobs(i)); + } + for (int i = 0; i < v0_layer_param.blobs_lr_size(); ++i) { + layer_param->add_blobs_lr(v0_layer_param.blobs_lr(i)); + } + for (int i = 0; i < v0_layer_param.weight_decay_size(); ++i) { + layer_param->add_weight_decay(v0_layer_param.weight_decay(i)); + } + if (v0_layer_param.has_num_output()) { + if (type == "conv") { + layer_param->mutable_convolution_param()->set_num_output( + v0_layer_param.num_output()); + } else if (type == "innerproduct") { + layer_param->mutable_inner_product_param()->set_num_output( + v0_layer_param.num_output()); + } else { + LOG(ERROR) << "Unknown parameter num_output for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_biasterm()) { + if (type == "conv") { + layer_param->mutable_convolution_param()->set_bias_term( + v0_layer_param.biasterm()); + } else if (type == "innerproduct") { + layer_param->mutable_inner_product_param()->set_bias_term( + v0_layer_param.biasterm()); + } else { + LOG(ERROR) << "Unknown parameter biasterm for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_weight_filler()) { + if (type == "conv") { + layer_param->mutable_convolution_param()-> + mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler()); + } else if (type == "innerproduct") { + layer_param->mutable_inner_product_param()-> + mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler()); + } else { + LOG(ERROR) << "Unknown parameter weight_filler for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_bias_filler()) { + if (type == "conv") { + layer_param->mutable_convolution_param()-> + mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler()); + } else if (type == "innerproduct") { + layer_param->mutable_inner_product_param()-> + mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler()); + } else { + LOG(ERROR) << "Unknown parameter bias_filler for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_pad()) { + if (type == "conv") { + layer_param->mutable_convolution_param()->set_pad(v0_layer_param.pad()); + } else if (type == "pool") { + layer_param->mutable_pooling_param()->set_pad(v0_layer_param.pad()); + } else { + LOG(ERROR) << "Unknown parameter pad for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_kernelsize()) { + if (type == "conv") { + layer_param->mutable_convolution_param()->set_kernel_size( + v0_layer_param.kernelsize()); + } else if (type == "pool") { + layer_param->mutable_pooling_param()->set_kernel_size( + v0_layer_param.kernelsize()); + } else { + LOG(ERROR) << "Unknown parameter kernelsize for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_group()) { + if (type == "conv") { + layer_param->mutable_convolution_param()->set_group( + v0_layer_param.group()); + } else { + LOG(ERROR) << "Unknown parameter group for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_stride()) { + if (type == "conv") { + layer_param->mutable_convolution_param()->set_stride( + v0_layer_param.stride()); + } else if (type == "pool") { + layer_param->mutable_pooling_param()->set_stride( + v0_layer_param.stride()); + } else { + LOG(ERROR) << "Unknown parameter stride for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_pool()) { + if (type == "pool") { + V0LayerParameter_PoolMethod pool = v0_layer_param.pool(); + switch (pool) { + case V0LayerParameter_PoolMethod_MAX: + layer_param->mutable_pooling_param()->set_pool( + PoolingParameter_PoolMethod_MAX); + break; + case V0LayerParameter_PoolMethod_AVE: + layer_param->mutable_pooling_param()->set_pool( + PoolingParameter_PoolMethod_AVE); + break; + case V0LayerParameter_PoolMethod_STOCHASTIC: + layer_param->mutable_pooling_param()->set_pool( + PoolingParameter_PoolMethod_STOCHASTIC); + break; + default: + LOG(ERROR) << "Unknown pool method " << pool; + is_fully_compatible = false; + } + } else { + LOG(ERROR) << "Unknown parameter pool for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_dropout_ratio()) { + if (type == "dropout") { + layer_param->mutable_dropout_param()->set_dropout_ratio( + v0_layer_param.dropout_ratio()); + } else { + LOG(ERROR) << "Unknown parameter dropout_ratio for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_local_size()) { + if (type == "lrn") { + layer_param->mutable_lrn_param()->set_local_size( + v0_layer_param.local_size()); + } else { + LOG(ERROR) << "Unknown parameter local_size for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_alpha()) { + if (type == "lrn") { + layer_param->mutable_lrn_param()->set_alpha(v0_layer_param.alpha()); + } else { + LOG(ERROR) << "Unknown parameter alpha for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_beta()) { + if (type == "lrn") { + layer_param->mutable_lrn_param()->set_beta(v0_layer_param.beta()); + } else { + LOG(ERROR) << "Unknown parameter beta for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_k()) { + if (type == "lrn") { + layer_param->mutable_lrn_param()->set_k(v0_layer_param.k()); + } else { + LOG(ERROR) << "Unknown parameter k for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_source()) { + if (type == "data") { + layer_param->mutable_data_param()->set_source(v0_layer_param.source()); + } else if (type == "hdf5_data") { + layer_param->mutable_hdf5_data_param()->set_source( + v0_layer_param.source()); + } else if (type == "images") { + layer_param->mutable_image_data_param()->set_source( + v0_layer_param.source()); + } else if (type == "window_data") { + layer_param->mutable_window_data_param()->set_source( + v0_layer_param.source()); + } else if (type == "infogain_loss") { + layer_param->mutable_infogain_loss_param()->set_source( + v0_layer_param.source()); + } else { + LOG(ERROR) << "Unknown parameter source for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_scale()) { + layer_param->mutable_transform_param()-> + set_scale(v0_layer_param.scale()); + } + if (v0_layer_param.has_meanfile()) { + layer_param->mutable_transform_param()-> + set_mean_file(v0_layer_param.meanfile()); + } + if (v0_layer_param.has_batchsize()) { + if (type == "data") { + layer_param->mutable_data_param()->set_batch_size( + v0_layer_param.batchsize()); + } else if (type == "hdf5_data") { + layer_param->mutable_hdf5_data_param()->set_batch_size( + v0_layer_param.batchsize()); + } else if (type == "images") { + layer_param->mutable_image_data_param()->set_batch_size( + v0_layer_param.batchsize()); + } else if (type == "window_data") { + layer_param->mutable_window_data_param()->set_batch_size( + v0_layer_param.batchsize()); + } else { + LOG(ERROR) << "Unknown parameter batchsize for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_cropsize()) { + layer_param->mutable_transform_param()-> + set_crop_size(v0_layer_param.cropsize()); + } + if (v0_layer_param.has_mirror()) { + layer_param->mutable_transform_param()-> + set_mirror(v0_layer_param.mirror()); + } + if (v0_layer_param.has_rand_skip()) { + if (type == "data") { + layer_param->mutable_data_param()->set_rand_skip( + v0_layer_param.rand_skip()); + } else if (type == "images") { + layer_param->mutable_image_data_param()->set_rand_skip( + v0_layer_param.rand_skip()); + } else { + LOG(ERROR) << "Unknown parameter rand_skip for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_shuffle_images()) { + if (type == "images") { + layer_param->mutable_image_data_param()->set_shuffle( + v0_layer_param.shuffle_images()); + } else { + LOG(ERROR) << "Unknown parameter shuffle for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_new_height()) { + if (type == "images") { + layer_param->mutable_image_data_param()->set_new_height( + v0_layer_param.new_height()); + } else { + LOG(ERROR) << "Unknown parameter new_height for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_new_width()) { + if (type == "images") { + layer_param->mutable_image_data_param()->set_new_width( + v0_layer_param.new_width()); + } else { + LOG(ERROR) << "Unknown parameter new_width for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_concat_dim()) { + if (type == "concat") { + layer_param->mutable_concat_param()->set_concat_dim( + v0_layer_param.concat_dim()); + } else { + LOG(ERROR) << "Unknown parameter concat_dim for layer type " << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_det_fg_threshold()) { + if (type == "window_data") { + layer_param->mutable_window_data_param()->set_fg_threshold( + v0_layer_param.det_fg_threshold()); + } else { + LOG(ERROR) << "Unknown parameter det_fg_threshold for layer type " + << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_det_bg_threshold()) { + if (type == "window_data") { + layer_param->mutable_window_data_param()->set_bg_threshold( + v0_layer_param.det_bg_threshold()); + } else { + LOG(ERROR) << "Unknown parameter det_bg_threshold for layer type " + << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_det_fg_fraction()) { + if (type == "window_data") { + layer_param->mutable_window_data_param()->set_fg_fraction( + v0_layer_param.det_fg_fraction()); + } else { + LOG(ERROR) << "Unknown parameter det_fg_fraction for layer type " + << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_det_context_pad()) { + if (type == "window_data") { + layer_param->mutable_window_data_param()->set_context_pad( + v0_layer_param.det_context_pad()); + } else { + LOG(ERROR) << "Unknown parameter det_context_pad for layer type " + << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_det_crop_mode()) { + if (type == "window_data") { + layer_param->mutable_window_data_param()->set_crop_mode( + v0_layer_param.det_crop_mode()); + } else { + LOG(ERROR) << "Unknown parameter det_crop_mode for layer type " + << type; + is_fully_compatible = false; + } + } + if (v0_layer_param.has_hdf5_output_param()) { + if (type == "hdf5_output") { + layer_param->mutable_hdf5_output_param()->CopyFrom( + v0_layer_param.hdf5_output_param()); + } else { + LOG(ERROR) << "Unknown parameter hdf5_output_param for layer type " + << type; + is_fully_compatible = false; + } + } + } + return is_fully_compatible; +} + +V1LayerParameter_LayerType UpgradeV0LayerType(const string& type) { + if (type == "accuracy") { + return V1LayerParameter_LayerType_ACCURACY; + } else if (type == "bnll") { + return V1LayerParameter_LayerType_BNLL; + } else if (type == "concat") { + return V1LayerParameter_LayerType_CONCAT; + } else if (type == "conv") { + return V1LayerParameter_LayerType_CONVOLUTION; + } else if (type == "data") { + return V1LayerParameter_LayerType_DATA; + } else if (type == "dropout") { + return V1LayerParameter_LayerType_DROPOUT; + } else if (type == "euclidean_loss") { + return V1LayerParameter_LayerType_EUCLIDEAN_LOSS; + } else if (type == "flatten") { + return V1LayerParameter_LayerType_FLATTEN; + } else if (type == "hdf5_data") { + return V1LayerParameter_LayerType_HDF5_DATA; + } else if (type == "hdf5_output") { + return V1LayerParameter_LayerType_HDF5_OUTPUT; + } else if (type == "im2col") { + return V1LayerParameter_LayerType_IM2COL; + } else if (type == "images") { + return V1LayerParameter_LayerType_IMAGE_DATA; + } else if (type == "infogain_loss") { + return V1LayerParameter_LayerType_INFOGAIN_LOSS; + } else if (type == "innerproduct") { + return V1LayerParameter_LayerType_INNER_PRODUCT; + } else if (type == "lrn") { + return V1LayerParameter_LayerType_LRN; + } else if (type == "multinomial_logistic_loss") { + return V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS; + } else if (type == "pool") { + return V1LayerParameter_LayerType_POOLING; + } else if (type == "relu") { + return V1LayerParameter_LayerType_RELU; + } else if (type == "sigmoid") { + return V1LayerParameter_LayerType_SIGMOID; + } else if (type == "softmax") { + return V1LayerParameter_LayerType_SOFTMAX; + } else if (type == "softmax_loss") { + return V1LayerParameter_LayerType_SOFTMAX_LOSS; + } else if (type == "split") { + return V1LayerParameter_LayerType_SPLIT; + } else if (type == "tanh") { + return V1LayerParameter_LayerType_TANH; + } else if (type == "window_data") { + return V1LayerParameter_LayerType_WINDOW_DATA; + } else { + LOG(FATAL) << "Unknown layer name: " << type; + return V1LayerParameter_LayerType_NONE; + } +} + +bool NetNeedsDataUpgrade(const NetParameter& net_param) { + for (int i = 0; i < net_param.layers_size(); ++i) { + if (net_param.layers(i).type() == V1LayerParameter_LayerType_DATA) { + DataParameter layer_param = net_param.layers(i).data_param(); + if (layer_param.has_scale()) { return true; } + if (layer_param.has_mean_file()) { return true; } + if (layer_param.has_crop_size()) { return true; } + if (layer_param.has_mirror()) { return true; } + } + if (net_param.layers(i).type() == V1LayerParameter_LayerType_IMAGE_DATA) { + ImageDataParameter layer_param = net_param.layers(i).image_data_param(); + if (layer_param.has_scale()) { return true; } + if (layer_param.has_mean_file()) { return true; } + if (layer_param.has_crop_size()) { return true; } + if (layer_param.has_mirror()) { return true; } + } + if (net_param.layers(i).type() == V1LayerParameter_LayerType_WINDOW_DATA) { + WindowDataParameter layer_param = net_param.layers(i).window_data_param(); + if (layer_param.has_scale()) { return true; } + if (layer_param.has_mean_file()) { return true; } + if (layer_param.has_crop_size()) { return true; } + if (layer_param.has_mirror()) { return true; } + } + } + return false; +} + +#define CONVERT_LAYER_TRANSFORM_PARAM(TYPE, Name, param_name) \ + do { \ + if (net_param->layers(i).type() == V1LayerParameter_LayerType_##TYPE) { \ + Name##Parameter* layer_param = \ + net_param->mutable_layers(i)->mutable_##param_name##_param(); \ + TransformationParameter* transform_param = \ + net_param->mutable_layers(i)->mutable_transform_param(); \ + if (layer_param->has_scale()) { \ + transform_param->set_scale(layer_param->scale()); \ + layer_param->clear_scale(); \ + } \ + if (layer_param->has_mean_file()) { \ + transform_param->set_mean_file(layer_param->mean_file()); \ + layer_param->clear_mean_file(); \ + } \ + if (layer_param->has_crop_size()) { \ + transform_param->set_crop_size(layer_param->crop_size()); \ + layer_param->clear_crop_size(); \ + } \ + if (layer_param->has_mirror()) { \ + transform_param->set_mirror(layer_param->mirror()); \ + layer_param->clear_mirror(); \ + } \ + } \ + } while (0) + +void UpgradeNetDataTransformation(NetParameter* net_param) { + for (int i = 0; i < net_param->layers_size(); ++i) { + CONVERT_LAYER_TRANSFORM_PARAM(DATA, Data, data); + CONVERT_LAYER_TRANSFORM_PARAM(IMAGE_DATA, ImageData, image_data); + CONVERT_LAYER_TRANSFORM_PARAM(WINDOW_DATA, WindowData, window_data); + } +} + +bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) { + bool success = true; + if (NetNeedsV0ToV1Upgrade(*param)) { + // NetParameter was specified using the old style (V0LayerParameter); try to + // upgrade it. + LOG(ERROR) << "Attempting to upgrade input file specified using deprecated " + << "V0LayerParameter: " << param_file; + NetParameter original_param(*param); + if (!UpgradeV0Net(original_param, param)) { + success = false; + LOG(ERROR) << "Warning: had one or more problems upgrading " + << "V0NetParameter to NetParameter (see above); continuing anyway."; + } else { + LOG(INFO) << "Successfully upgraded file specified using deprecated " + << "V0LayerParameter"; + } + LOG(ERROR) << "Note that future Caffe releases will not support " + << "V0NetParameter; use ./build/tools/upgrade_net_proto_text for " + << "prototxt and ./build/tools/upgrade_net_proto_binary for model " + << "weights upgrade this and any other net protos to the new format."; + } + // NetParameter uses old style data transformation fields; try to upgrade it. + if (NetNeedsDataUpgrade(*param)) { + LOG(ERROR) << "Attempting to upgrade input file specified using deprecated " + << "transformation parameters: " << param_file; + UpgradeNetDataTransformation(param); + LOG(INFO) << "Successfully upgraded file specified using deprecated " + << "data transformation parameters."; + LOG(ERROR) << "Note that future Caffe releases will only support " + << "transform_param messages for transformation fields."; + } + if (NetNeedsV1ToV2Upgrade(*param)) { + LOG(ERROR) << "Attempting to upgrade input file specified using deprecated " + << "V1LayerParameter: " << param_file; + NetParameter original_param(*param); + if (!UpgradeV1Net(original_param, param)) { + success = false; + LOG(ERROR) << "Warning: had one or more problems upgrading " + << "V1LayerParameter (see above); continuing anyway."; + } else { + LOG(INFO) << "Successfully upgraded file specified using deprecated " + << "V1LayerParameter"; + } + } + return success; +} + +bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) { + bool is_fully_compatible = true; + if (v1_net_param.layer_size() > 0) { + LOG(ERROR) << "Input NetParameter to be upgraded already specifies 'layer' " + << "fields; these will be ignored for the upgrade."; + is_fully_compatible = false; + } + net_param->CopyFrom(v1_net_param); + net_param->clear_layers(); + net_param->clear_layer(); + for (int i = 0; i < v1_net_param.layers_size(); ++i) { + if (!UpgradeV1LayerParameter(v1_net_param.layers(i), + net_param->add_layer())) { + LOG(ERROR) << "Upgrade of input layer " << i << " failed."; + is_fully_compatible = false; + } + } + return is_fully_compatible; +} + +bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param, + LayerParameter* layer_param) { + layer_param->Clear(); + bool is_fully_compatible = true; + for (int i = 0; i < v1_layer_param.bottom_size(); ++i) { + layer_param->add_bottom(v1_layer_param.bottom(i)); + } + for (int i = 0; i < v1_layer_param.top_size(); ++i) { + layer_param->add_top(v1_layer_param.top(i)); + } + if (v1_layer_param.has_name()) { + layer_param->set_name(v1_layer_param.name()); + } + for (int i = 0; i < v1_layer_param.include_size(); ++i) { + layer_param->add_include()->CopyFrom(v1_layer_param.include(i)); + } + for (int i = 0; i < v1_layer_param.exclude_size(); ++i) { + layer_param->add_exclude()->CopyFrom(v1_layer_param.exclude(i)); + } + if (v1_layer_param.has_type()) { + layer_param->set_type(UpgradeV1LayerType(v1_layer_param.type())); + } + for (int i = 0; i < v1_layer_param.blobs_size(); ++i) { + layer_param->add_blobs()->CopyFrom(v1_layer_param.blobs(i)); + } + for (int i = 0; i < v1_layer_param.param_size(); ++i) { + while (layer_param->param_size() <= i) { layer_param->add_param(); } + layer_param->mutable_param(i)->set_name(v1_layer_param.param(i)); + } + ParamSpec_DimCheckMode mode; + for (int i = 0; i < v1_layer_param.blob_share_mode_size(); ++i) { + while (layer_param->param_size() <= i) { layer_param->add_param(); } + switch (v1_layer_param.blob_share_mode(i)) { + case V1LayerParameter_DimCheckMode_STRICT: + mode = ParamSpec_DimCheckMode_STRICT; + break; + case V1LayerParameter_DimCheckMode_PERMISSIVE: + mode = ParamSpec_DimCheckMode_PERMISSIVE; + break; + default: + LOG(FATAL) << "Unknown blob_share_mode: " + << v1_layer_param.blob_share_mode(i); + break; + } + layer_param->mutable_param(i)->set_share_mode(mode); + } + for (int i = 0; i < v1_layer_param.blobs_lr_size(); ++i) { + while (layer_param->param_size() <= i) { layer_param->add_param(); } + layer_param->mutable_param(i)->set_lr_mult(v1_layer_param.blobs_lr(i)); + } + for (int i = 0; i < v1_layer_param.weight_decay_size(); ++i) { + while (layer_param->param_size() <= i) { layer_param->add_param(); } + layer_param->mutable_param(i)->set_decay_mult( + v1_layer_param.weight_decay(i)); + } + for (int i = 0; i < v1_layer_param.loss_weight_size(); ++i) { + layer_param->add_loss_weight(v1_layer_param.loss_weight(i)); + } + if (v1_layer_param.has_accuracy_param()) { + layer_param->mutable_accuracy_param()->CopyFrom( + v1_layer_param.accuracy_param()); + } + if (v1_layer_param.has_argmax_param()) { + layer_param->mutable_argmax_param()->CopyFrom( + v1_layer_param.argmax_param()); + } + if (v1_layer_param.has_concat_param()) { + layer_param->mutable_concat_param()->CopyFrom( + v1_layer_param.concat_param()); + } + if (v1_layer_param.has_contrastive_loss_param()) { + layer_param->mutable_contrastive_loss_param()->CopyFrom( + v1_layer_param.contrastive_loss_param()); + } + if (v1_layer_param.has_convolution_param()) { + layer_param->mutable_convolution_param()->CopyFrom( + v1_layer_param.convolution_param()); + } + if (v1_layer_param.has_data_param()) { + layer_param->mutable_data_param()->CopyFrom( + v1_layer_param.data_param()); + } + if (v1_layer_param.has_dropout_param()) { + layer_param->mutable_dropout_param()->CopyFrom( + v1_layer_param.dropout_param()); + } + if (v1_layer_param.has_dummy_data_param()) { + layer_param->mutable_dummy_data_param()->CopyFrom( + v1_layer_param.dummy_data_param()); + } + if (v1_layer_param.has_eltwise_param()) { + layer_param->mutable_eltwise_param()->CopyFrom( + v1_layer_param.eltwise_param()); + } + if (v1_layer_param.has_exp_param()) { + layer_param->mutable_exp_param()->CopyFrom( + v1_layer_param.exp_param()); + } + if (v1_layer_param.has_hdf5_data_param()) { + layer_param->mutable_hdf5_data_param()->CopyFrom( + v1_layer_param.hdf5_data_param()); + } + if (v1_layer_param.has_hdf5_output_param()) { + layer_param->mutable_hdf5_output_param()->CopyFrom( + v1_layer_param.hdf5_output_param()); + } + if (v1_layer_param.has_hinge_loss_param()) { + layer_param->mutable_hinge_loss_param()->CopyFrom( + v1_layer_param.hinge_loss_param()); + } + if (v1_layer_param.has_image_data_param()) { + layer_param->mutable_image_data_param()->CopyFrom( + v1_layer_param.image_data_param()); + } + if (v1_layer_param.has_infogain_loss_param()) { + layer_param->mutable_infogain_loss_param()->CopyFrom( + v1_layer_param.infogain_loss_param()); + } + if (v1_layer_param.has_inner_product_param()) { + layer_param->mutable_inner_product_param()->CopyFrom( + v1_layer_param.inner_product_param()); + } + if (v1_layer_param.has_lrn_param()) { + layer_param->mutable_lrn_param()->CopyFrom( + v1_layer_param.lrn_param()); + } + if (v1_layer_param.has_memory_data_param()) { + layer_param->mutable_memory_data_param()->CopyFrom( + v1_layer_param.memory_data_param()); + } + if (v1_layer_param.has_mvn_param()) { + layer_param->mutable_mvn_param()->CopyFrom( + v1_layer_param.mvn_param()); + } + if (v1_layer_param.has_pooling_param()) { + layer_param->mutable_pooling_param()->CopyFrom( + v1_layer_param.pooling_param()); + } + if (v1_layer_param.has_power_param()) { + layer_param->mutable_power_param()->CopyFrom( + v1_layer_param.power_param()); + } + if (v1_layer_param.has_relu_param()) { + layer_param->mutable_relu_param()->CopyFrom( + v1_layer_param.relu_param()); + } + if (v1_layer_param.has_sigmoid_param()) { + layer_param->mutable_sigmoid_param()->CopyFrom( + v1_layer_param.sigmoid_param()); + } + if (v1_layer_param.has_softmax_param()) { + layer_param->mutable_softmax_param()->CopyFrom( + v1_layer_param.softmax_param()); + } + if (v1_layer_param.has_slice_param()) { + layer_param->mutable_slice_param()->CopyFrom( + v1_layer_param.slice_param()); + } + if (v1_layer_param.has_tanh_param()) { + layer_param->mutable_tanh_param()->CopyFrom( + v1_layer_param.tanh_param()); + } + if (v1_layer_param.has_threshold_param()) { + layer_param->mutable_threshold_param()->CopyFrom( + v1_layer_param.threshold_param()); + } + if (v1_layer_param.has_window_data_param()) { + layer_param->mutable_window_data_param()->CopyFrom( + v1_layer_param.window_data_param()); + } + if (v1_layer_param.has_transform_param()) { + layer_param->mutable_transform_param()->CopyFrom( + v1_layer_param.transform_param()); + } + if (v1_layer_param.has_loss_param()) { + layer_param->mutable_loss_param()->CopyFrom( + v1_layer_param.loss_param()); + } + if (v1_layer_param.has_layer()) { + LOG(ERROR) << "Input NetParameter has V0 layer -- ignoring."; + is_fully_compatible = false; + } + return is_fully_compatible; +} + +const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) { + switch (type) { + case V1LayerParameter_LayerType_NONE: + return ""; + case V1LayerParameter_LayerType_ABSVAL: + return "AbsVal"; + case V1LayerParameter_LayerType_ACCURACY: + return "Accuracy"; + case V1LayerParameter_LayerType_ARGMAX: + return "ArgMax"; + case V1LayerParameter_LayerType_BNLL: + return "BNLL"; + case V1LayerParameter_LayerType_CONCAT: + return "Concat"; + case V1LayerParameter_LayerType_CONTRASTIVE_LOSS: + return "ContrastiveLoss"; + case V1LayerParameter_LayerType_CONVOLUTION: + return "Convolution"; + case V1LayerParameter_LayerType_DECONVOLUTION: + return "Deconvolution"; + case V1LayerParameter_LayerType_DATA: + return "Data"; + case V1LayerParameter_LayerType_DROPOUT: + return "Dropout"; + case V1LayerParameter_LayerType_DUMMY_DATA: + return "DummyData"; + case V1LayerParameter_LayerType_EUCLIDEAN_LOSS: + return "EuclideanLoss"; + case V1LayerParameter_LayerType_ELTWISE: + return "Eltwise"; + case V1LayerParameter_LayerType_EXP: + return "Exp"; + case V1LayerParameter_LayerType_FLATTEN: + return "Flatten"; + case V1LayerParameter_LayerType_HDF5_DATA: + return "HDF5Data"; + case V1LayerParameter_LayerType_HDF5_OUTPUT: + return "HDF5Output"; + case V1LayerParameter_LayerType_HINGE_LOSS: + return "HingeLoss"; + case V1LayerParameter_LayerType_IM2COL: + return "Im2col"; + case V1LayerParameter_LayerType_IMAGE_DATA: + return "ImageData"; + case V1LayerParameter_LayerType_INFOGAIN_LOSS: + return "InfogainLoss"; + case V1LayerParameter_LayerType_INNER_PRODUCT: + return "InnerProduct"; + case V1LayerParameter_LayerType_LRN: + return "LRN"; + case V1LayerParameter_LayerType_MEMORY_DATA: + return "MemoryData"; + case V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS: + return "MultinomialLogisticLoss"; + case V1LayerParameter_LayerType_MVN: + return "MVN"; + case V1LayerParameter_LayerType_POOLING: + return "Pooling"; + case V1LayerParameter_LayerType_POWER: + return "Power"; + case V1LayerParameter_LayerType_RELU: + return "ReLU"; + case V1LayerParameter_LayerType_SIGMOID: + return "Sigmoid"; + case V1LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS: + return "SigmoidCrossEntropyLoss"; + case V1LayerParameter_LayerType_SILENCE: + return "Silence"; + case V1LayerParameter_LayerType_SOFTMAX: + return "Softmax"; + case V1LayerParameter_LayerType_SOFTMAX_LOSS: + return "SoftmaxWithLoss"; + case V1LayerParameter_LayerType_SPLIT: + return "Split"; + case V1LayerParameter_LayerType_SLICE: + return "Slice"; + case V1LayerParameter_LayerType_TANH: + return "TanH"; + case V1LayerParameter_LayerType_WINDOW_DATA: + return "WindowData"; + case V1LayerParameter_LayerType_THRESHOLD: + return "Threshold"; + default: + LOG(FATAL) << "Unknown V1LayerParameter layer type: " << type; + return ""; + } +} + +void ReadNetParamsFromTextFileOrDie(const string& param_file, + NetParameter* param) { + CHECK(ReadProtoFromTextFile(param_file, param)) + << "Failed to parse NetParameter file: " << param_file; + UpgradeNetAsNeeded(param_file, param); +} + +void ReadNetParamsFromBinaryFileOrDie(const string& param_file, + NetParameter* param) { + CHECK(ReadProtoFromBinaryFile(param_file, param)) + << "Failed to parse NetParameter file: " << param_file; + UpgradeNetAsNeeded(param_file, param); +} + +} // namespace caffe diff --git a/modules/dnn/src/caffe/util/io.hpp b/modules/dnn/src/caffe/util/io.hpp new file mode 100644 index 000000000..6b3a0c418 --- /dev/null +++ b/modules/dnn/src/caffe/util/io.hpp @@ -0,0 +1,152 @@ +#ifndef CAFFE_UTIL_IO_H_ +#define CAFFE_UTIL_IO_H_ + +#include +#include +#define CHECK(cond) if (!(cond)) std::cerr << #cond +#define CHECK_EQ(a, b) if (!((a) == (b))) std::cerr << #a << "!=" << #b +#define LOG(WHERE) std::cerr + +//#include +#include +#include + +#include +//#include "hdf5.h" +//#include "hdf5_hl.h" + +//#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe.pb.h" + +#define HDF5_NUM_DIMS 4 + +namespace caffe { + +using ::google::protobuf::Message; + +//inline void MakeTempFilename(string* temp_filename) { +// temp_filename->clear(); +// *temp_filename = "/tmp/caffe_test.XXXXXX"; +// char* temp_filename_cstr = new char[temp_filename->size() + 1]; +// // NOLINT_NEXT_LINE(runtime/printf) +// strcpy(temp_filename_cstr, temp_filename->c_str()); +// int fd = mkstemp(temp_filename_cstr); +// CHECK_GE(fd, 0) << "Failed to open a temporary file at: " << *temp_filename; +// close(fd); +// *temp_filename = temp_filename_cstr; +// delete[] temp_filename_cstr; +//} +// +//inline void MakeTempDir(string* temp_dirname) { +// temp_dirname->clear(); +// *temp_dirname = "/tmp/caffe_test.XXXXXX"; +// char* temp_dirname_cstr = new char[temp_dirname->size() + 1]; +// // NOLINT_NEXT_LINE(runtime/printf) +// strcpy(temp_dirname_cstr, temp_dirname->c_str()); +// char* mkdtemp_result = mkdtemp(temp_dirname_cstr); +// CHECK(mkdtemp_result != NULL) +// << "Failed to create a temporary directory at: " << *temp_dirname; +// *temp_dirname = temp_dirname_cstr; +// delete[] temp_dirname_cstr; +//} + +bool ReadProtoFromTextFile(const char* filename, Message* proto); + +inline bool ReadProtoFromTextFile(const string& filename, Message* proto) { + return ReadProtoFromTextFile(filename.c_str(), proto); +} + +inline void ReadProtoFromTextFileOrDie(const char* filename, Message* proto) { + CHECK(ReadProtoFromTextFile(filename, proto)); +} + +inline void ReadProtoFromTextFileOrDie(const string& filename, Message* proto) { + ReadProtoFromTextFileOrDie(filename.c_str(), proto); +} + +void WriteProtoToTextFile(const Message& proto, const char* filename); +inline void WriteProtoToTextFile(const Message& proto, const string& filename) { + WriteProtoToTextFile(proto, filename.c_str()); +} + +bool ReadProtoFromBinaryFile(const char* filename, Message* proto); + +inline bool ReadProtoFromBinaryFile(const string& filename, Message* proto) { + return ReadProtoFromBinaryFile(filename.c_str(), proto); +} + +inline void ReadProtoFromBinaryFileOrDie(const char* filename, Message* proto) { + CHECK(ReadProtoFromBinaryFile(filename, proto)); +} + +inline void ReadProtoFromBinaryFileOrDie(const string& filename, + Message* proto) { + ReadProtoFromBinaryFileOrDie(filename.c_str(), proto); +} + + +void WriteProtoToBinaryFile(const Message& proto, const char* filename); +inline void WriteProtoToBinaryFile( + const Message& proto, const string& filename) { + WriteProtoToBinaryFile(proto, filename.c_str()); +} + +bool ReadFileToDatum(const string& filename, const int label, Datum* datum); + +inline bool ReadFileToDatum(const string& filename, Datum* datum) { + return ReadFileToDatum(filename, -1, datum); +} + +bool ReadImageToDatum(const string& filename, const int label, + const int height, const int width, const bool is_color, + const std::string & encoding, Datum* datum); + +inline bool ReadImageToDatum(const string& filename, const int label, + const int height, const int width, const bool is_color, Datum* datum) { + return ReadImageToDatum(filename, label, height, width, is_color, + "", datum); +} + +inline bool ReadImageToDatum(const string& filename, const int label, + const int height, const int width, Datum* datum) { + return ReadImageToDatum(filename, label, height, width, true, datum); +} + +inline bool ReadImageToDatum(const string& filename, const int label, + const bool is_color, Datum* datum) { + return ReadImageToDatum(filename, label, 0, 0, is_color, datum); +} + +inline bool ReadImageToDatum(const string& filename, const int label, + Datum* datum) { + return ReadImageToDatum(filename, label, 0, 0, true, datum); +} + +inline bool ReadImageToDatum(const string& filename, const int label, + const std::string & encoding, Datum* datum) { + return ReadImageToDatum(filename, label, 0, 0, true, encoding, datum); +} + +bool DecodeDatumNative(Datum* datum); +bool DecodeDatum(Datum* datum, bool is_color); + +cv::Mat ReadImageToCVMat(const string& filename, + const int height, const int width, const bool is_color); + +cv::Mat ReadImageToCVMat(const string& filename, + const int height, const int width); + +cv::Mat ReadImageToCVMat(const string& filename, + const bool is_color); + +cv::Mat ReadImageToCVMat(const string& filename); + +cv::Mat DecodeDatumToCVMatNative(const Datum& datum); +cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color); + +void CVMatToDatum(const cv::Mat& cv_img, Datum* datum); + +} // namespace caffe + +#endif // CAFFE_UTIL_IO_H_ diff --git a/modules/dnn/src/caffe/util/upgrade_proto.hpp b/modules/dnn/src/caffe/util/upgrade_proto.hpp new file mode 100644 index 000000000..ace27f25e --- /dev/null +++ b/modules/dnn/src/caffe/util/upgrade_proto.hpp @@ -0,0 +1,64 @@ +#ifndef CAFFE_UTIL_UPGRADE_PROTO_H_ +#define CAFFE_UTIL_UPGRADE_PROTO_H_ + +#include +#include "caffe/common.hpp" +#include "caffe.pb.h" + +namespace caffe { + +// Return true iff the net is not the current version. +bool NetNeedsUpgrade(const NetParameter& net_param); + +// Return true iff any layer contains parameters specified using +// deprecated V0LayerParameter. +bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param); + +// Perform all necessary transformations to upgrade a V0NetParameter into a +// NetParameter (including upgrading padding layers and LayerParameters). +bool UpgradeV0Net(const NetParameter& v0_net_param, NetParameter* net_param); + +// Upgrade NetParameter with padding layers to pad-aware conv layers. +// For any padding layer, remove it and put its pad parameter in any layers +// taking its top blob as input. +// Error if any of these above layers are not-conv layers. +void UpgradeV0PaddingLayers(const NetParameter& param, + NetParameter* param_upgraded_pad); + +// Upgrade a single V0LayerConnection to the V1LayerParameter format. +bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection, + V1LayerParameter* layer_param); + +V1LayerParameter_LayerType UpgradeV0LayerType(const string& type); + +// Return true iff any layer contains deprecated data transformation parameters. +bool NetNeedsDataUpgrade(const NetParameter& net_param); + +// Perform all necessary transformations to upgrade old transformation fields +// into a TransformationParameter. +void UpgradeNetDataTransformation(NetParameter* net_param); + +// Return true iff the Net contains any layers specified as V1LayerParameters. +bool NetNeedsV1ToV2Upgrade(const NetParameter& net_param); + +// Perform all necessary transformations to upgrade a NetParameter with +// deprecated V1LayerParameters. +bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param); + +bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param, + LayerParameter* layer_param); + +const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type); + +// Check for deprecations and upgrade the NetParameter as needed. +bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param); + +// Read parameters from a file into a NetParameter proto message. +void ReadNetParamsFromTextFileOrDie(const string& param_file, + NetParameter* param); +void ReadNetParamsFromBinaryFileOrDie(const string& param_file, + NetParameter* param); + +} // namespace caffe + +#endif // CAFFE_UTIL_UPGRADE_PROTO_H_ diff --git a/modules/dnn/src/caffe_importer.cpp b/modules/dnn/src/caffe_importer.cpp index d9359a31d..4334346a0 100644 --- a/modules/dnn/src/caffe_importer.cpp +++ b/modules/dnn/src/caffe_importer.cpp @@ -6,41 +6,167 @@ #include #include #include +#include "caffe/util/upgrade_proto.hpp" using namespace cv; using namespace cv::dnn; +using ::google::protobuf::RepeatedField; +using ::google::protobuf::RepeatedPtrField; +using ::google::protobuf::Message; +using ::google::protobuf::Descriptor; +using ::google::protobuf::FieldDescriptor; +using ::google::protobuf::Reflection; + namespace { + void walk(const Descriptor *desc) + { + if (desc == NULL) + return; + + std::cout << "* " << desc->full_name() << std::endl; + + for (int i = 0; i < desc->field_count(); i++) + { + const FieldDescriptor *fdesc = desc->field(i); + + if (fdesc->message_type()) + walk(fdesc->message_type()); + else; + //std::cout << "f " << desc->field(i)->full_name() << std::endl; + } + } + class CaffeImporter : public Importer { + caffe::NetParameter net; + cv::dnn::LayerParams params; public: CaffeImporter(const char *pototxt, const char *caffeModel) { - std::ifstream proto_ifs(pototxt, std::ifstream::in); - std::ifstream model_ifs(caffeModel, std::ifstream::in); - - CV_Assert(proto_ifs.is_open() && model_ifs.is_open()); - - google::protobuf::io::IstreamInputStream proto_zcs(&proto_ifs); - google::protobuf::io::IstreamInputStream model_zcs(&model_ifs); + ReadNetParamsFromTextFileOrDie(std::string(pototxt), &net); + } - //google::protobuf::Message msg_weights; - caffe::NetParameter msg_arch; + inline bool skipCaffeLayerParam(const FieldDescriptor *fd) + { + const std::string &name = fd->name(); + + if (fd->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) + { + static const char *SKIP_FIELDS[] = { "type", "name", "top", "bottom", NULL }; + + for (int i = 0; SKIP_FIELDS[i]; i++) + { + if (name == SKIP_FIELDS[i]) + return true; + } + + return false; + } + else + { + static const std::string _param("_param"); + bool endsWith_param = (name.size() >= _param.size()) && name.compare(name.size() - _param.size(), _param.size(), _param) == 0; + return !endsWith_param; + } + } - CV_Assert(google::protobuf::TextFormat::Parse(&proto_zcs, &msg_arch)); - //CV_Assert( msg_weights.ParseFromZeroCopyStream(model_zcs) ); + void addParam(const Message &msg, const FieldDescriptor *field, cv::dnn::LayerParams ¶ms) + { + const Reflection *msgRefl = msg.GetReflection(); + int type = field->cpp_type(); + bool isRepeated = field->is_repeated(); + const std::string &name = field->name(); + + std::cout << field->type_name() << " " << name << ":"; + + #define GET_FIRST(Type) (isRepeated ? msgRefl->GetRepeated##Type(msg, field, 0) : msgRefl->Get##Type(msg, field)) + + switch (type) + { + case FieldDescriptor::CPPTYPE_INT32: + std::cout << params.set(name, GET_FIRST(Int32)); + break; + case FieldDescriptor::CPPTYPE_UINT32: + std::cout << params.set(name, GET_FIRST(UInt32)); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + std::cout << params.set(name, GET_FIRST(Double)); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + std::cout << params.set(name, GET_FIRST(Float)); + break; + case FieldDescriptor::CPPTYPE_ENUM: + std::cout << params.set(name, GET_FIRST(Enum)->name()); + break; + case FieldDescriptor::CPPTYPE_BOOL: + std::cout << params.set(name, GET_FIRST(Bool)); + break; + default: + std::cout << "unknown"; + break; + } + + std::cout << std::endl; + } - const google::protobuf::Descriptor *desc_arch = msg_arch.GetDescriptor(); - CV_Assert(desc_arch); + void extractLayerParams(const Message &msg, cv::dnn::LayerParams ¶ms) + { + const Descriptor *msgDesc = msg.GetDescriptor(); + const Reflection *msgRefl = msg.GetReflection(); + + for (int fieldId = 0; fieldId < msgDesc->field_count(); fieldId++) + { + const FieldDescriptor *fd = msgDesc->field(fieldId); + + bool hasData = fd->is_required() || + (fd->is_optional() && (msgRefl->HasField(msg, fd) || fd->has_default_value())) || + (fd->is_repeated() && msgRefl->FieldSize(msg, fd) > 0); + + if ( !hasData || skipCaffeLayerParam(fd) ) + continue; + + if (fd->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) + { + if (fd->is_repeated()) //Extract only first item! + extractLayerParams(msgRefl->GetRepeatedMessage(msg, fd, 0), params); + else + extractLayerParams(msgRefl->GetMessage(msg, fd), params); + } + else + { + addParam(msg, fd, params); + } + } } void populateNetConfiguration(Ptr config) { + const Descriptor *layerDescriptor = caffe::LayerParameter::descriptor(); + + for (int li = 0; li < net.layer_size(); li++) + { + const caffe::LayerParameter layer = net.layer(li); + String name = layer.name(); + String type = layer.type(); + + std::vector bottoms, tops; + bottoms.assign(layer.bottom().begin(), layer.bottom().end()); + tops.assign(layer.top().begin(), layer.top().end()); + + std::cout << std::endl << "LAYER: " << name << std::endl; + + cv::dnn::LayerParams params; + extractLayerParams(layer, params); + //SetUp + //int id = config->addLayer(name, type); + //config->setLayerOutputLabels(id, bottoms); + } } ~CaffeImporter() diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 026214329..ceb29133a 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -7,22 +7,18 @@ namespace cv namespace dnn { -Blob::Blob(Mat &in) : _InputOutputArray(in) +Blob::Blob() { } -Blob::Blob(const Mat &in) : _InputOutputArray(in) +Blob::Blob(InputArray in) { - -} - -Blob::Blob(UMat &in) : _InputOutputArray(in) -{ - + CV_Assert(in.isMat()); + m = in.getMat(); } -Blob::Blob(const UMat &in) : _InputOutputArray(in) +Net::~Net() { } @@ -34,9 +30,9 @@ Importer::~Importer() } -Net::~Net() +Ptr NetConfiguration::create() { - + return Ptr(new NetConfiguration()); } } diff --git a/modules/dnn/src/precomp.hpp b/modules/dnn/src/precomp.hpp new file mode 100644 index 000000000..630af81d2 --- /dev/null +++ b/modules/dnn/src/precomp.hpp @@ -0,0 +1,2 @@ +#include +#include \ No newline at end of file diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp new file mode 100644 index 000000000..6bd94e77d --- /dev/null +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -0,0 +1,31 @@ +#include "test_precomp.hpp" + +namespace cvtest +{ + +using namespace std; +using namespace std::tr1; +using namespace testing; +using namespace cv; +using namespace cv::dnn; + +static std::string getOpenCVExtraDir() +{ + return cvtest::TS::ptr()->get_data_path(); +} + +TEST(ReadCaffePrototxt_gtsrb, Accuracy) +{ + Ptr importer = createCaffeImporter(getOpenCVExtraDir() + "/dnn/gtsrb.prototxt", ""); + Ptr config = NetConfiguration::create(); + importer->populateNetConfiguration(config); +} + +TEST(ReadCaffePrototxt_GoogleNet, Accuracy) +{ + Ptr importer = createCaffeImporter(getOpenCVExtraDir() + "/dnn/googlenet_deploy.prototxt", ""); + Ptr config = NetConfiguration::create(); + importer->populateNetConfiguration(config); +} + +} \ No newline at end of file diff --git a/modules/dnn/test/test_main.cpp b/modules/dnn/test/test_main.cpp new file mode 100644 index 000000000..6f9ac2e0d --- /dev/null +++ b/modules/dnn/test/test_main.cpp @@ -0,0 +1,3 @@ +#include "test_precomp.hpp" + +CV_TEST_MAIN("") diff --git a/modules/dnn/test/test_precomp.hpp b/modules/dnn/test/test_precomp.hpp new file mode 100644 index 000000000..98aa6238d --- /dev/null +++ b/modules/dnn/test/test_precomp.hpp @@ -0,0 +1,19 @@ +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + +#ifndef __OPENCV_TEST_PRECOMP_HPP__ +#define __OPENCV_TEST_PRECOMP_HPP__ + +#include "opencv2/core.hpp" +#include "opencv2/dnn.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/ts.hpp" +#include +#include + +#endif diff --git a/modules/dnn/testdata/dnn/googlenet_deploy.prototxt b/modules/dnn/testdata/dnn/googlenet_deploy.prototxt new file mode 100644 index 000000000..4648bf26e --- /dev/null +++ b/modules/dnn/testdata/dnn/googlenet_deploy.prototxt @@ -0,0 +1,2156 @@ +name: "GoogleNet" +input: "data" +input_dim: 10 +input_dim: 3 +input_dim: 224 +input_dim: 224 +layer { + name: "conv1/7x7_s2" + type: "Convolution" + bottom: "data" + top: "conv1/7x7_s2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 3 + kernel_size: 7 + stride: 2 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "conv1/relu_7x7" + type: "ReLU" + bottom: "conv1/7x7_s2" + top: "conv1/7x7_s2" +} +layer { + name: "pool1/3x3_s2" + type: "Pooling" + bottom: "conv1/7x7_s2" + top: "pool1/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "pool1/norm1" + type: "LRN" + bottom: "pool1/3x3_s2" + top: "pool1/norm1" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "conv2/3x3_reduce" + type: "Convolution" + bottom: "pool1/norm1" + top: "conv2/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "conv2/relu_3x3_reduce" + type: "ReLU" + bottom: "conv2/3x3_reduce" + top: "conv2/3x3_reduce" +} +layer { + name: "conv2/3x3" + type: "Convolution" + bottom: "conv2/3x3_reduce" + top: "conv2/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "conv2/relu_3x3" + type: "ReLU" + bottom: "conv2/3x3" + top: "conv2/3x3" +} +layer { + name: "conv2/norm2" + type: "LRN" + bottom: "conv2/3x3" + top: "conv2/norm2" + lrn_param { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } +} +layer { + name: "pool2/3x3_s2" + type: "Pooling" + bottom: "conv2/norm2" + top: "pool2/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "inception_3a/1x1" + type: "Convolution" + bottom: "pool2/3x3_s2" + top: "inception_3a/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_1x1" + type: "ReLU" + bottom: "inception_3a/1x1" + top: "inception_3a/1x1" +} +layer { + name: "inception_3a/3x3_reduce" + type: "Convolution" + bottom: "pool2/3x3_s2" + top: "inception_3a/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 96 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_3a/3x3_reduce" + top: "inception_3a/3x3_reduce" +} +layer { + name: "inception_3a/3x3" + type: "Convolution" + bottom: "inception_3a/3x3_reduce" + top: "inception_3a/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_3x3" + type: "ReLU" + bottom: "inception_3a/3x3" + top: "inception_3a/3x3" +} +layer { + name: "inception_3a/5x5_reduce" + type: "Convolution" + bottom: "pool2/3x3_s2" + top: "inception_3a/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_3a/5x5_reduce" + top: "inception_3a/5x5_reduce" +} +layer { + name: "inception_3a/5x5" + type: "Convolution" + bottom: "inception_3a/5x5_reduce" + top: "inception_3a/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_5x5" + type: "ReLU" + bottom: "inception_3a/5x5" + top: "inception_3a/5x5" +} +layer { + name: "inception_3a/pool" + type: "Pooling" + bottom: "pool2/3x3_s2" + top: "inception_3a/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_3a/pool_proj" + type: "Convolution" + bottom: "inception_3a/pool" + top: "inception_3a/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3a/relu_pool_proj" + type: "ReLU" + bottom: "inception_3a/pool_proj" + top: "inception_3a/pool_proj" +} +layer { + name: "inception_3a/output" + type: "Concat" + bottom: "inception_3a/1x1" + bottom: "inception_3a/3x3" + bottom: "inception_3a/5x5" + bottom: "inception_3a/pool_proj" + top: "inception_3a/output" +} +layer { + name: "inception_3b/1x1" + type: "Convolution" + bottom: "inception_3a/output" + top: "inception_3b/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_1x1" + type: "ReLU" + bottom: "inception_3b/1x1" + top: "inception_3b/1x1" +} +layer { + name: "inception_3b/3x3_reduce" + type: "Convolution" + bottom: "inception_3a/output" + top: "inception_3b/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_3b/3x3_reduce" + top: "inception_3b/3x3_reduce" +} +layer { + name: "inception_3b/3x3" + type: "Convolution" + bottom: "inception_3b/3x3_reduce" + top: "inception_3b/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_3x3" + type: "ReLU" + bottom: "inception_3b/3x3" + top: "inception_3b/3x3" +} +layer { + name: "inception_3b/5x5_reduce" + type: "Convolution" + bottom: "inception_3a/output" + top: "inception_3b/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_3b/5x5_reduce" + top: "inception_3b/5x5_reduce" +} +layer { + name: "inception_3b/5x5" + type: "Convolution" + bottom: "inception_3b/5x5_reduce" + top: "inception_3b/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 96 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_5x5" + type: "ReLU" + bottom: "inception_3b/5x5" + top: "inception_3b/5x5" +} +layer { + name: "inception_3b/pool" + type: "Pooling" + bottom: "inception_3a/output" + top: "inception_3b/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_3b/pool_proj" + type: "Convolution" + bottom: "inception_3b/pool" + top: "inception_3b/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_3b/relu_pool_proj" + type: "ReLU" + bottom: "inception_3b/pool_proj" + top: "inception_3b/pool_proj" +} +layer { + name: "inception_3b/output" + type: "Concat" + bottom: "inception_3b/1x1" + bottom: "inception_3b/3x3" + bottom: "inception_3b/5x5" + bottom: "inception_3b/pool_proj" + top: "inception_3b/output" +} +layer { + name: "pool3/3x3_s2" + type: "Pooling" + bottom: "inception_3b/output" + top: "pool3/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "inception_4a/1x1" + type: "Convolution" + bottom: "pool3/3x3_s2" + top: "inception_4a/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_1x1" + type: "ReLU" + bottom: "inception_4a/1x1" + top: "inception_4a/1x1" +} +layer { + name: "inception_4a/3x3_reduce" + type: "Convolution" + bottom: "pool3/3x3_s2" + top: "inception_4a/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 96 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4a/3x3_reduce" + top: "inception_4a/3x3_reduce" +} +layer { + name: "inception_4a/3x3" + type: "Convolution" + bottom: "inception_4a/3x3_reduce" + top: "inception_4a/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 208 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_3x3" + type: "ReLU" + bottom: "inception_4a/3x3" + top: "inception_4a/3x3" +} +layer { + name: "inception_4a/5x5_reduce" + type: "Convolution" + bottom: "pool3/3x3_s2" + top: "inception_4a/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4a/5x5_reduce" + top: "inception_4a/5x5_reduce" +} +layer { + name: "inception_4a/5x5" + type: "Convolution" + bottom: "inception_4a/5x5_reduce" + top: "inception_4a/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 48 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_5x5" + type: "ReLU" + bottom: "inception_4a/5x5" + top: "inception_4a/5x5" +} +layer { + name: "inception_4a/pool" + type: "Pooling" + bottom: "pool3/3x3_s2" + top: "inception_4a/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4a/pool_proj" + type: "Convolution" + bottom: "inception_4a/pool" + top: "inception_4a/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4a/relu_pool_proj" + type: "ReLU" + bottom: "inception_4a/pool_proj" + top: "inception_4a/pool_proj" +} +layer { + name: "inception_4a/output" + type: "Concat" + bottom: "inception_4a/1x1" + bottom: "inception_4a/3x3" + bottom: "inception_4a/5x5" + bottom: "inception_4a/pool_proj" + top: "inception_4a/output" +} +layer { + name: "inception_4b/1x1" + type: "Convolution" + bottom: "inception_4a/output" + top: "inception_4b/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 160 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_1x1" + type: "ReLU" + bottom: "inception_4b/1x1" + top: "inception_4b/1x1" +} +layer { + name: "inception_4b/3x3_reduce" + type: "Convolution" + bottom: "inception_4a/output" + top: "inception_4b/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 112 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4b/3x3_reduce" + top: "inception_4b/3x3_reduce" +} +layer { + name: "inception_4b/3x3" + type: "Convolution" + bottom: "inception_4b/3x3_reduce" + top: "inception_4b/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 224 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_3x3" + type: "ReLU" + bottom: "inception_4b/3x3" + top: "inception_4b/3x3" +} +layer { + name: "inception_4b/5x5_reduce" + type: "Convolution" + bottom: "inception_4a/output" + top: "inception_4b/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4b/5x5_reduce" + top: "inception_4b/5x5_reduce" +} +layer { + name: "inception_4b/5x5" + type: "Convolution" + bottom: "inception_4b/5x5_reduce" + top: "inception_4b/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_5x5" + type: "ReLU" + bottom: "inception_4b/5x5" + top: "inception_4b/5x5" +} +layer { + name: "inception_4b/pool" + type: "Pooling" + bottom: "inception_4a/output" + top: "inception_4b/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4b/pool_proj" + type: "Convolution" + bottom: "inception_4b/pool" + top: "inception_4b/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4b/relu_pool_proj" + type: "ReLU" + bottom: "inception_4b/pool_proj" + top: "inception_4b/pool_proj" +} +layer { + name: "inception_4b/output" + type: "Concat" + bottom: "inception_4b/1x1" + bottom: "inception_4b/3x3" + bottom: "inception_4b/5x5" + bottom: "inception_4b/pool_proj" + top: "inception_4b/output" +} +layer { + name: "inception_4c/1x1" + type: "Convolution" + bottom: "inception_4b/output" + top: "inception_4c/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_1x1" + type: "ReLU" + bottom: "inception_4c/1x1" + top: "inception_4c/1x1" +} +layer { + name: "inception_4c/3x3_reduce" + type: "Convolution" + bottom: "inception_4b/output" + top: "inception_4c/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4c/3x3_reduce" + top: "inception_4c/3x3_reduce" +} +layer { + name: "inception_4c/3x3" + type: "Convolution" + bottom: "inception_4c/3x3_reduce" + top: "inception_4c/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_3x3" + type: "ReLU" + bottom: "inception_4c/3x3" + top: "inception_4c/3x3" +} +layer { + name: "inception_4c/5x5_reduce" + type: "Convolution" + bottom: "inception_4b/output" + top: "inception_4c/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4c/5x5_reduce" + top: "inception_4c/5x5_reduce" +} +layer { + name: "inception_4c/5x5" + type: "Convolution" + bottom: "inception_4c/5x5_reduce" + top: "inception_4c/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_5x5" + type: "ReLU" + bottom: "inception_4c/5x5" + top: "inception_4c/5x5" +} +layer { + name: "inception_4c/pool" + type: "Pooling" + bottom: "inception_4b/output" + top: "inception_4c/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4c/pool_proj" + type: "Convolution" + bottom: "inception_4c/pool" + top: "inception_4c/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4c/relu_pool_proj" + type: "ReLU" + bottom: "inception_4c/pool_proj" + top: "inception_4c/pool_proj" +} +layer { + name: "inception_4c/output" + type: "Concat" + bottom: "inception_4c/1x1" + bottom: "inception_4c/3x3" + bottom: "inception_4c/5x5" + bottom: "inception_4c/pool_proj" + top: "inception_4c/output" +} +layer { + name: "inception_4d/1x1" + type: "Convolution" + bottom: "inception_4c/output" + top: "inception_4d/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 112 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_1x1" + type: "ReLU" + bottom: "inception_4d/1x1" + top: "inception_4d/1x1" +} +layer { + name: "inception_4d/3x3_reduce" + type: "Convolution" + bottom: "inception_4c/output" + top: "inception_4d/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 144 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4d/3x3_reduce" + top: "inception_4d/3x3_reduce" +} +layer { + name: "inception_4d/3x3" + type: "Convolution" + bottom: "inception_4d/3x3_reduce" + top: "inception_4d/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 288 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_3x3" + type: "ReLU" + bottom: "inception_4d/3x3" + top: "inception_4d/3x3" +} +layer { + name: "inception_4d/5x5_reduce" + type: "Convolution" + bottom: "inception_4c/output" + top: "inception_4d/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4d/5x5_reduce" + top: "inception_4d/5x5_reduce" +} +layer { + name: "inception_4d/5x5" + type: "Convolution" + bottom: "inception_4d/5x5_reduce" + top: "inception_4d/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_5x5" + type: "ReLU" + bottom: "inception_4d/5x5" + top: "inception_4d/5x5" +} +layer { + name: "inception_4d/pool" + type: "Pooling" + bottom: "inception_4c/output" + top: "inception_4d/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4d/pool_proj" + type: "Convolution" + bottom: "inception_4d/pool" + top: "inception_4d/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4d/relu_pool_proj" + type: "ReLU" + bottom: "inception_4d/pool_proj" + top: "inception_4d/pool_proj" +} +layer { + name: "inception_4d/output" + type: "Concat" + bottom: "inception_4d/1x1" + bottom: "inception_4d/3x3" + bottom: "inception_4d/5x5" + bottom: "inception_4d/pool_proj" + top: "inception_4d/output" +} +layer { + name: "inception_4e/1x1" + type: "Convolution" + bottom: "inception_4d/output" + top: "inception_4e/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_1x1" + type: "ReLU" + bottom: "inception_4e/1x1" + top: "inception_4e/1x1" +} +layer { + name: "inception_4e/3x3_reduce" + type: "Convolution" + bottom: "inception_4d/output" + top: "inception_4e/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 160 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_4e/3x3_reduce" + top: "inception_4e/3x3_reduce" +} +layer { + name: "inception_4e/3x3" + type: "Convolution" + bottom: "inception_4e/3x3_reduce" + top: "inception_4e/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 320 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_3x3" + type: "ReLU" + bottom: "inception_4e/3x3" + top: "inception_4e/3x3" +} +layer { + name: "inception_4e/5x5_reduce" + type: "Convolution" + bottom: "inception_4d/output" + top: "inception_4e/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_4e/5x5_reduce" + top: "inception_4e/5x5_reduce" +} +layer { + name: "inception_4e/5x5" + type: "Convolution" + bottom: "inception_4e/5x5_reduce" + top: "inception_4e/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_5x5" + type: "ReLU" + bottom: "inception_4e/5x5" + top: "inception_4e/5x5" +} +layer { + name: "inception_4e/pool" + type: "Pooling" + bottom: "inception_4d/output" + top: "inception_4e/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_4e/pool_proj" + type: "Convolution" + bottom: "inception_4e/pool" + top: "inception_4e/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_4e/relu_pool_proj" + type: "ReLU" + bottom: "inception_4e/pool_proj" + top: "inception_4e/pool_proj" +} +layer { + name: "inception_4e/output" + type: "Concat" + bottom: "inception_4e/1x1" + bottom: "inception_4e/3x3" + bottom: "inception_4e/5x5" + bottom: "inception_4e/pool_proj" + top: "inception_4e/output" +} +layer { + name: "pool4/3x3_s2" + type: "Pooling" + bottom: "inception_4e/output" + top: "pool4/3x3_s2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "inception_5a/1x1" + type: "Convolution" + bottom: "pool4/3x3_s2" + top: "inception_5a/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_1x1" + type: "ReLU" + bottom: "inception_5a/1x1" + top: "inception_5a/1x1" +} +layer { + name: "inception_5a/3x3_reduce" + type: "Convolution" + bottom: "pool4/3x3_s2" + top: "inception_5a/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 160 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_5a/3x3_reduce" + top: "inception_5a/3x3_reduce" +} +layer { + name: "inception_5a/3x3" + type: "Convolution" + bottom: "inception_5a/3x3_reduce" + top: "inception_5a/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 320 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_3x3" + type: "ReLU" + bottom: "inception_5a/3x3" + top: "inception_5a/3x3" +} +layer { + name: "inception_5a/5x5_reduce" + type: "Convolution" + bottom: "pool4/3x3_s2" + top: "inception_5a/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_5a/5x5_reduce" + top: "inception_5a/5x5_reduce" +} +layer { + name: "inception_5a/5x5" + type: "Convolution" + bottom: "inception_5a/5x5_reduce" + top: "inception_5a/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_5x5" + type: "ReLU" + bottom: "inception_5a/5x5" + top: "inception_5a/5x5" +} +layer { + name: "inception_5a/pool" + type: "Pooling" + bottom: "pool4/3x3_s2" + top: "inception_5a/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_5a/pool_proj" + type: "Convolution" + bottom: "inception_5a/pool" + top: "inception_5a/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5a/relu_pool_proj" + type: "ReLU" + bottom: "inception_5a/pool_proj" + top: "inception_5a/pool_proj" +} +layer { + name: "inception_5a/output" + type: "Concat" + bottom: "inception_5a/1x1" + bottom: "inception_5a/3x3" + bottom: "inception_5a/5x5" + bottom: "inception_5a/pool_proj" + top: "inception_5a/output" +} +layer { + name: "inception_5b/1x1" + type: "Convolution" + bottom: "inception_5a/output" + top: "inception_5b/1x1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 384 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_1x1" + type: "ReLU" + bottom: "inception_5b/1x1" + top: "inception_5b/1x1" +} +layer { + name: "inception_5b/3x3_reduce" + type: "Convolution" + bottom: "inception_5a/output" + top: "inception_5b/3x3_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 192 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.09 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_3x3_reduce" + type: "ReLU" + bottom: "inception_5b/3x3_reduce" + top: "inception_5b/3x3_reduce" +} +layer { + name: "inception_5b/3x3" + type: "Convolution" + bottom: "inception_5b/3x3_reduce" + top: "inception_5b/3x3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 384 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_3x3" + type: "ReLU" + bottom: "inception_5b/3x3" + top: "inception_5b/3x3" +} +layer { + name: "inception_5b/5x5_reduce" + type: "Convolution" + bottom: "inception_5a/output" + top: "inception_5b/5x5_reduce" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 48 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.2 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_5x5_reduce" + type: "ReLU" + bottom: "inception_5b/5x5_reduce" + top: "inception_5b/5x5_reduce" +} +layer { + name: "inception_5b/5x5" + type: "Convolution" + bottom: "inception_5b/5x5_reduce" + top: "inception_5b/5x5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 2 + kernel_size: 5 + weight_filler { + type: "xavier" + std: 0.03 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_5x5" + type: "ReLU" + bottom: "inception_5b/5x5" + top: "inception_5b/5x5" +} +layer { + name: "inception_5b/pool" + type: "Pooling" + bottom: "inception_5a/output" + top: "inception_5b/pool" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + name: "inception_5b/pool_proj" + type: "Convolution" + bottom: "inception_5b/pool" + top: "inception_5b/pool_proj" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "xavier" + std: 0.1 + } + bias_filler { + type: "constant" + value: 0.2 + } + } +} +layer { + name: "inception_5b/relu_pool_proj" + type: "ReLU" + bottom: "inception_5b/pool_proj" + top: "inception_5b/pool_proj" +} +layer { + name: "inception_5b/output" + type: "Concat" + bottom: "inception_5b/1x1" + bottom: "inception_5b/3x3" + bottom: "inception_5b/5x5" + bottom: "inception_5b/pool_proj" + top: "inception_5b/output" +} +layer { + name: "pool5/7x7_s1" + type: "Pooling" + bottom: "inception_5b/output" + top: "pool5/7x7_s1" + pooling_param { + pool: AVE + kernel_size: 7 + stride: 1 + } +} +layer { + name: "pool5/drop_7x7_s1" + type: "Dropout" + bottom: "pool5/7x7_s1" + top: "pool5/7x7_s1" + dropout_param { + dropout_ratio: 0.4 + } +} +layer { + name: "loss3/classifier" + type: "InnerProduct" + bottom: "pool5/7x7_s1" + top: "loss3/classifier" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 1000 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "prob" + type: "Softmax" + bottom: "loss3/classifier" + top: "prob" +} diff --git a/modules/dnn/testdata/dnn/gtsrb.prototxt b/modules/dnn/testdata/dnn/gtsrb.prototxt new file mode 100644 index 000000000..eecc471a4 --- /dev/null +++ b/modules/dnn/testdata/dnn/gtsrb.prototxt @@ -0,0 +1,167 @@ +name: "gtsrb" +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 48 +input_dim: 48 + + +layers { + bottom: "data" + top: "layer1" + name: "layer1" + type: CONVOLUTION + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 100 + kernel_size: 7 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layers { + name: "tanh1" + bottom: "layer1" + top: "layer1" + type: TANH +} +layers { + bottom: "layer1" + top: "layer2" + name: "layer2" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} + +layers { + bottom: "layer2" + top: "layer3" + name: "layer3" + type: CONVOLUTION + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 150 + kernel_size: 4 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layers { + name: "tanh3" + bottom: "layer3" + top: "layer3" + type: TANH +} +layers { + bottom: "layer3" + top: "layer4" + name: "layer4" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} + +layers { + bottom: "layer4" + top: "layer5" + name: "layer5" + type: CONVOLUTION + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 250 + kernel_size: 4 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layers { + name: "tanh5" + bottom: "layer5" + top: "layer5" + type: TANH +} +layers { + bottom: "layer5" + top: "layer6" + name: "layer6" + type: POOLING + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} + +layers { + bottom: "layer6" + top: "layer7" + name: "layer7" + type: INNER_PRODUCT + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 300 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} +layers { + name: "tanh7" + bottom: "layer7" + top: "layer7" + type: TANH +} + +layers { + bottom: "layer7" + top: "layer8" + name: "layer8" + type: INNER_PRODUCT + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 43 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } +} + +layers { + name: "loss" + top: "loss" + bottom: "layer8" + type: SOFTMAX +}