Added LRN and SoftMax layers, some fixes

10 years ago · eef4d1dda9
parent 09ffc43ce6
commit eef4d1dda9
7 changed files with 364 additions and 22 deletions
--- a/modules/dnn/include/opencv2/dnn/dict.hpp
+++ b/modules/dnn/include/opencv2/dnn/dict.hpp
@ -122,13 +122,13 @@ inline unsigned DictValue::get<unsigned>() const
 template<>
 inline double DictValue::get<double>() const
 {
-    if (type == cv::Param::FLOAT)
+    if (type == cv::Param::REAL)
        return d;
    else if (type == cv::Param::INT)
        return i;
    else
    {
-        CV_Assert(type == cv::Param::FLOAT || type == cv::Param::INT);
+        CV_Assert(type == cv::Param::REAL || type == cv::Param::INT);
        return 0;
    }
 }
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -32,8 +32,6 @@ namespace dnn
        void fill(InputArray in);
        void fill(int ndims, const int *sizes, int type, void *data, bool deepCopy = true);

-        bool empty() const;
-
        Mat& getMatRef();
        const Mat& getMatRef() const;
        Mat getMat();
@ -42,18 +40,26 @@ namespace dnn
        //shape getters
        int cols() const;
        int rows() const;
-        Size size() const;
        int channels() const;
        int num() const;
+        Size size2() const;
        Vec4i shape() const;
-        size_t total() const;
+        int size(int index) const;
+        size_t total(int startAxis = 0, int endAxis = -1) const;

        uchar *rawPtr(int num = 0, int cn = 0, int row = 0, int col = 0);

        template<typename TFloat>
        TFloat *ptr(int num = 0, int cn = 0, int row = 0, int col = 0);

+        int type() const;
+        bool isFloat() const;
+        bool isDouble() const;
+
    private:
+        const int *sizes() const;
+        int dims() const;
+
        Mat m;
    };

@ -179,8 +185,8 @@ namespace dnn
    };

    //registers layer on module load time
-    #define REGISTER_LAYER(type, constuctorFunc) \
-    static _LayerRegisterer __layerRegisterer_##type(#type, func);
+    #define REGISTER_LAYER_FUNC(type, constuctorFunc) \
+    static _LayerRegisterer __layerRegisterer_##type(#type, constuctorFunc);

    #define REGISTER_LAYER_CLASS(type, class)                       \
    Ptr<Layer> __layerRegisterer_func_##type(LayerParams &params)   \
--- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
@ -40,7 +40,7 @@ namespace dnn
        return m.size[m.dims-2];
    }

-    inline Size Blob::size() const
+    inline Size Blob::size2() const
    {
        return Size(cols(), rows());
    }
@ -63,10 +63,24 @@ namespace dnn
        return Vec4i(m.size.p);
    }

-    inline size_t Blob::total() const
+    inline int Blob::size(int index) const
    {
-        CV_DbgAssert(m.dims == 4);
-        return (size_t) m.size[0] * m.size[1] * m.size[2] * m.size[3];
+        CV_Assert(index >= 0 && index < dims());
+        return sizes()[index];
+    }
+
+    inline size_t Blob::total(int startAxis, int endAxis) const
+    {
+        if (endAxis == -1)
+            endAxis = dims();
+
+        CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
+
+        size_t size = 1; //assume that blob isn't empty
+        for (int i = startAxis; i < endAxis; i++)
+            size *= (size_t) sizes()[i];
+
+        return size;
    }

    inline uchar* Blob::rawPtr(int num, int cn, int row, int col)
@ -82,6 +96,33 @@ namespace dnn
        return (TFloat*) rawPtr(num, cn, row, col);
    }

+    inline int Blob::type() const
+    {
+        return m.depth();
+    }
+
+    inline bool Blob::isFloat() const
+    {
+        return (type() == CV_32F);
+    }
+
+    inline bool Blob::isDouble() const
+    {
+        return (type() == CV_32F);
+    }
+
+    inline const int * Blob::sizes() const
+    {
+        return &m.size[0];
+    }
+
+    inline int Blob::dims() const
+    {
+        return m.dims;
+    }
+
+
+
 }
 }

--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -133,13 +133,6 @@ struct LayerOutId
    LayerOutId() {}
    LayerOutId(int layerId, int outputId, const String &outputName = String()) 
        : lid(layerId), oid(outputId), name(outputName) {}
-
-    struct UnaryMatchName
-    {   
-        const String &name;
-        UnaryMatchName(const String &_name) : name(_name) {}
-        bool operator()(const String &other) { return name == other; }
-    };
 };

 struct LayerData
@ -247,7 +240,7 @@ struct Net::Impl
            for (size_t oi = 0; oi < ld.outputNames.size() && count < maxCount; oi++)
            {
                if (ld.outputNames[oi] == name)
-                    found[count++] = LayerOutId(lid, oi);
+                    found[count++] = LayerOutId(lid, (int)oi);
            }
        }

@ -261,7 +254,6 @@ struct Net::Impl
        MapIdToLayerData::iterator it;
        for (it = layers.begin(); it != layers.end(); it++)
        {
-            int lid = it->first;
            LayerData &ld = it->second;

            ld.inputBlobs.resize(ld.inputNames.size());
@ -402,6 +394,8 @@ struct Net::Impl
        }

        LayerData &ld = layers[layerId];
+
+        //forward parents
        for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
        {
            LayerData &ild = layers[*i];
@ -410,9 +404,17 @@ struct Net::Impl
            {
                if (ild.layerInstance)
                    ild.layerInstance->forward(ild.inputBlobs, ild.outputBlobs);
-                ild.flag = true;
+                ild.flag = 1;
            }
        }
+
+        //forward itself
+        if (!ld.flag)
+        {
+            if (ld.layerInstance)
+                ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs);
+            ld.flag = 1;
+        }
    }

    void forwardAll()
--- a/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@ -0,0 +1,39 @@
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+
+namespace cv
+{
+namespace dnn
+{
+    class BlankLayer : public Layer
+    {
+    public:
+
+        BlankLayer(LayerParams &params)
+        {
+
+        }
+
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+        {
+            outputs.resize(inputs.size());
+            for (size_t i = 0; i < inputs.size(); i++)
+                outputs[i] = *inputs[i];
+        }
+
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+        {
+            for (size_t i = 0; i < inputs.size(); i++)
+                outputs[i] = *inputs[i];
+        }
+    };
+
+    static Ptr<Layer> blankLayerRegisterer(LayerParams &params)
+    {
+        return Ptr<Layer>(new BlankLayer(params));
+    }
+
+
+    REGISTER_LAYER_FUNC(Dropout, blankLayerRegisterer)
+}
+}
--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@ -0,0 +1,138 @@
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include <opencv2/imgproc.hpp>
+
+namespace cv
+{
+namespace dnn
+{
+    class LRNLayer : public Layer
+    {
+        enum
+        {
+            CHANNEL_NRM,
+            SPATIAL_NRM,
+            SPATIAL_CONTRAST_NRM //cuda-convnet feature
+        } type;
+
+        int size;
+        double alpha, beta;
+
+        Blob bufBlob;
+
+        void channelNoramlization(Blob &src, Blob &dst);
+        void spatialNormalization(Blob &src, Blob &dst);
+
+    public:
+        
+        LRNLayer(LayerParams &params);
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    };
+
+
+    REGISTER_LAYER_CLASS(LRN, LRNLayer)
+
+
+    LRNLayer::LRNLayer(LayerParams &params)
+    {
+        String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
+        if (nrmType == "ACROSS_CHANNELS")
+            type = CHANNEL_NRM;
+        else if (nrmType == "WITHIN_CHANNEL")
+            type = SPATIAL_NRM;
+        else
+            CV_Error(cv::Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
+
+        size = params.get<int>("local_size", 5);
+        if (size % 2 != 1)
+            CV_Error(cv::Error::StsBadArg, "LRN layer only supports odd values for local_size");
+
+        alpha = params.get<double>("alpha", 1);
+        beta = params.get<double>("beta", 0.75);
+    }
+
+    void LRNLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        CV_Assert(inputs.size() == 1);
+        outputs.resize(1);
+
+        Vec4i shape = inputs[0]->shape();
+        outputs[0].create(shape);
+
+        shape[1] = 1; //maybe make shape[0] = 1 too
+        bufBlob.create(shape);
+    }
+
+    void LRNLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        Blob &src = *inputs[0];
+        Blob &dst = outputs[0];
+
+        switch (type)
+        {
+        case CHANNEL_NRM:
+            channelNoramlization(src, dst);
+            break;
+        case SPATIAL_NRM:
+            spatialNormalization(src, dst);
+            break;
+        default:
+            CV_Error(cv::Error::StsNotImplemented, "Unimplemented mode of LRN layer");
+            break;
+        }
+    }
+
+    void LRNLayer::channelNoramlization(Blob &srcBlob, Blob &dstBlob)
+    {
+        int num = srcBlob.num();
+        int channels = srcBlob.channels();
+
+        for (int n = 0; n < num; n++)
+        {
+            Mat buf = bufBlob.getMat(n, 0);
+            Mat accum = dstBlob.getMat(n, 0); //memory saving
+            accum.setTo(0);
+
+            for (int cn = 0; cn < channels; cn++)
+            {
+                cv::accumulateSquare(srcBlob.getMat(), accum);
+            }
+
+            accum.convertTo(accum, accum.type(), alpha/channels, 1);
+            cv::pow(accum, beta, accum);
+            
+            for (int cn = channels - 1; cn >= 0; cn--)
+            {
+                cv::divide(srcBlob.getMat(n, cn), accum, dstBlob.getMat(n, cn));
+            }
+        }
+    }
+
+    void LRNLayer::spatialNormalization(Blob &srcBlob, Blob &dstBlob)
+    {
+        int num = srcBlob.num();
+        int channels = srcBlob.channels();
+
+        for (int n = 0; n < num; n++)
+        {
+            for (int cn = 0; cn < channels; cn++)
+            {
+                Mat src = srcBlob.getMat(n, cn);
+                Mat dst = dstBlob.getMat(n, cn);
+                uchar *dataDst0 = dst.data;
+
+                cv::pow(srcBlob.getMat(n, cn), 2, dst);
+                //TODO: check border type
+                cv::boxFilter(dst, dst, dst.depth(), cv::Size(size, size), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
+                dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
+                cv::pow(dst, beta, dst);
+                cv::divide(src, dst, dst);
+
+                CV_DbgAssert(dataDst0 == dst.data);
+            }
+        }
+    }
+
+}
+}
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@ -0,0 +1,116 @@
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include <algorithm>
+#include <stdlib.h>
+using std::max;
+
+namespace cv
+{
+namespace dnn
+{
+    class SoftMaxLayer : public Layer
+    {
+        int axis;
+        Blob maxAggregator;
+
+    public:
+        SoftMaxLayer(LayerParams &params);
+        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
+    };
+
+
+    REGISTER_LAYER_CLASS(Softmax, SoftMaxLayer);
+
+
+    SoftMaxLayer::SoftMaxLayer(LayerParams &params)
+    {
+        axis = params.get<int>("axis", 1);
+        CV_Assert(0 <= axis && axis < 4);
+    }
+
+    void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        CV_Assert(inputs.size() == 1);
+        
+        Vec4i shape = inputs[0]->shape();
+        outputs.resize(1);
+        outputs[0].create(shape);
+
+        shape[axis] = 1;
+        maxAggregator.create(shape);
+    }
+
+    void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+    {
+        Blob &src = *inputs[0];
+        Blob &dst = outputs[0];
+
+        float *srcPtr = src.ptr<float>();
+        float *dstPtr = dst.ptr<float>();
+        float *bufPtr = maxAggregator.ptr<float>();
+
+        size_t totalSize = src.total();
+        size_t outerSize = src.total(0, axis);
+        size_t channels = src.size(axis);
+        size_t innerSize = src.total(axis + 1, -1);
+
+        size_t outerStep = src.total(axis);
+        size_t cnStep = src.total(axis + 1);
+
+        //compute max along axis
+        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
+        {
+            size_t srcOffset = outerDim * outerStep;
+            size_t bufOffset = outerDim * cnStep;
+
+            memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
+
+            for (size_t cnDim = 1; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
+            }
+        }
+
+        //subtract max
+        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
+        {
+            size_t srcOffset = outerDim * outerStep;
+            size_t bufOffset = outerDim * cnStep;
+
+            for (size_t cnDim = 0; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
+            }
+        }
+
+        cv::exp(dst.getMat(), dst.getMat());
+
+        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
+        {
+            size_t srcOffset = outerDim * outerStep;
+            size_t bufOffset = outerDim * cnStep;
+            
+            //sum exp along axis
+            for (size_t i = 0; i < innerSize; i++)
+                bufPtr[bufOffset + i] = 0.f;
+
+            for (size_t cnDim = 0; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
+            }
+
+            //divide by computed sum
+            for (size_t cnDim = 0; cnDim < channels; cnDim++)
+            {
+                for (size_t i = 0; i < innerSize; i++)
+                    dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
+            }
+        }
+    }
+
+}
+}