Added LRN and SoftMax layers, some fixes

10 years ago · eef4d1dda9
parent 09ffc43ce6
commit eef4d1dda9
7 changed files with 364 additions and 22 deletions
--- a/modules/dnn/include/opencv2/dnn/dict.hpp
+++ b/modules/dnn/include/opencv2/dnn/dict.hpp
@ -122,13 +122,13 @@ inline unsigned DictValue::get<unsigned>() const
 template<>
 inline double DictValue::get<double>() const
 {
-    if (type == cv::Param::FLOAT)
+    if (type == cv::Param::REAL)
        return d;
    else if (type == cv::Param::INT)
        return i;
    else
    {
-        CV_Assert(type == cv::Param::FLOAT || type == cv::Param::INT);
+        CV_Assert(type == cv::Param::REAL || type == cv::Param::INT);
        return 0;
    }
 }
--- a/modules/dnn/include/opencv2/dnn/dnn.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.hpp
@ -32,8 +32,6 @@ namespace dnn
        void fill(InputArray in);
        void fill(int ndims, const int *sizes, int type, void *data, bool deepCopy = true);
        bool empty() const;
        Mat& getMatRef();
        const Mat& getMatRef() const;
        Mat getMat();
@ -42,18 +40,26 @@ namespace dnn
        //shape getters
        int cols() const;
        int rows() const;
        Size size() const;
        int channels() const;
        int num() const;
        Size size2() const;
        Vec4i shape() const;
-        size_t total() const;
+        int size(int index) const;
        size_t total(int startAxis = 0, int endAxis = -1) const;
        uchar *rawPtr(int num = 0, int cn = 0, int row = 0, int col = 0);
        template<typename TFloat>
        TFloat *ptr(int num = 0, int cn = 0, int row = 0, int col = 0);
        int type() const;
        bool isFloat() const;
        bool isDouble() const;
    private:
        const int *sizes() const;
        int dims() const;
        Mat m;
    };
@ -179,8 +185,8 @@ namespace dnn
    };
    //registers layer on module load time
-    #define REGISTER_LAYER(type, constuctorFunc) \
+    #define REGISTER_LAYER_FUNC(type, constuctorFunc) \
-    static _LayerRegisterer __layerRegisterer_##type(#type, func);
+    static _LayerRegisterer __layerRegisterer_##type(#type, constuctorFunc);
    #define REGISTER_LAYER_CLASS(type, class)                       \
    Ptr<Layer> __layerRegisterer_func_##type(LayerParams &params)   \
--- a/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
+++ b/modules/dnn/include/opencv2/dnn/dnn.inl.hpp
@ -40,7 +40,7 @@ namespace dnn
        return m.size[m.dims-2];
    }
-    inline Size Blob::size() const
+    inline Size Blob::size2() const
    {
        return Size(cols(), rows());
    }
@ -63,10 +63,24 @@ namespace dnn
        return Vec4i(m.size.p);
    }
-    inline size_t Blob::total() const
+    inline int Blob::size(int index) const
    {
-        CV_DbgAssert(m.dims == 4);
+        CV_Assert(index >= 0 && index < dims());
-        return (size_t) m.size[0] * m.size[1] * m.size[2] * m.size[3];
+        return sizes()[index];
    }
    inline size_t Blob::total(int startAxis, int endAxis) const
    {
        if (endAxis == -1)
            endAxis = dims();
        CV_Assert(0 <= startAxis && startAxis <= endAxis && endAxis <= dims());
        size_t size = 1; //assume that blob isn't empty
        for (int i = startAxis; i < endAxis; i++)
            size *= (size_t) sizes()[i];
        return size;
    }
    inline uchar* Blob::rawPtr(int num, int cn, int row, int col)
@ -82,6 +96,33 @@ namespace dnn
        return (TFloat*) rawPtr(num, cn, row, col);
    }
    inline int Blob::type() const
    {
        return m.depth();
    }
    inline bool Blob::isFloat() const
    {
        return (type() == CV_32F);
    }
    inline bool Blob::isDouble() const
    {
        return (type() == CV_32F);
    }
    inline const int * Blob::sizes() const
    {
        return &m.size[0];
    }
    inline int Blob::dims() const
    {
        return m.dims;
    }
 }
 }
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@ -133,13 +133,6 @@ struct LayerOutId
    LayerOutId() {}
    LayerOutId(int layerId, int outputId, const String &outputName = String()) 
        : lid(layerId), oid(outputId), name(outputName) {}
    struct UnaryMatchName
    {   
        const String &name;
        UnaryMatchName(const String &_name) : name(_name) {}
        bool operator()(const String &other) { return name == other; }
    };
 };
 struct LayerData
@ -247,7 +240,7 @@ struct Net::Impl
            for (size_t oi = 0; oi < ld.outputNames.size() && count < maxCount; oi++)
            {
                if (ld.outputNames[oi] == name)
-                    found[count++] = LayerOutId(lid, oi);
+                    found[count++] = LayerOutId(lid, (int)oi);
            }
        }
@ -261,7 +254,6 @@ struct Net::Impl
        MapIdToLayerData::iterator it;
        for (it = layers.begin(); it != layers.end(); it++)
        {
            int lid = it->first;
            LayerData &ld = it->second;
            ld.inputBlobs.resize(ld.inputNames.size());
@ -402,6 +394,8 @@ struct Net::Impl
        }
        LayerData &ld = layers[layerId];
        //forward parents
        for (set<int>::iterator i = ld.inputLayersId.begin(); i != ld.inputLayersId.end(); i++)
        {
            LayerData &ild = layers[*i];
@ -410,9 +404,17 @@ struct Net::Impl
            {
                if (ild.layerInstance)
                    ild.layerInstance->forward(ild.inputBlobs, ild.outputBlobs);
-                ild.flag = true;
+                ild.flag = 1;
            }
        }
        //forward itself
        if (!ld.flag)
        {
            if (ld.layerInstance)
                ld.layerInstance->forward(ld.inputBlobs, ld.outputBlobs);
            ld.flag = 1;
        }
    }
    void forwardAll()
--- a/modules/dnn/src/layers/blank_layer.cpp
+++ b/modules/dnn/src/layers/blank_layer.cpp
@ -0,0 +1,39 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 namespace cv
 {
 namespace dnn
 {
    class BlankLayer : public Layer
    {
    public:
        BlankLayer(LayerParams &params)
        {
        }
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
        {
            outputs.resize(inputs.size());
            for (size_t i = 0; i < inputs.size(); i++)
                outputs[i] = *inputs[i];
        }
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
        {
            for (size_t i = 0; i < inputs.size(); i++)
                outputs[i] = *inputs[i];
        }
    };
    static Ptr<Layer> blankLayerRegisterer(LayerParams &params)
    {
        return Ptr<Layer>(new BlankLayer(params));
    }
    REGISTER_LAYER_FUNC(Dropout, blankLayerRegisterer)
 }
 }
--- a/modules/dnn/src/layers/lrn_layer.cpp
+++ b/modules/dnn/src/layers/lrn_layer.cpp
@ -0,0 +1,138 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include <opencv2/imgproc.hpp>
 namespace cv
 {
 namespace dnn
 {
    class LRNLayer : public Layer
    {
        enum
        {
            CHANNEL_NRM,
            SPATIAL_NRM,
            SPATIAL_CONTRAST_NRM //cuda-convnet feature
        } type;
        int size;
        double alpha, beta;
        Blob bufBlob;
        void channelNoramlization(Blob &src, Blob &dst);
        void spatialNormalization(Blob &src, Blob &dst);
    public:
        LRNLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
    REGISTER_LAYER_CLASS(LRN, LRNLayer)
    LRNLayer::LRNLayer(LayerParams &params)
    {
        String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
        if (nrmType == "ACROSS_CHANNELS")
            type = CHANNEL_NRM;
        else if (nrmType == "WITHIN_CHANNEL")
            type = SPATIAL_NRM;
        else
            CV_Error(cv::Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
        size = params.get<int>("local_size", 5);
        if (size % 2 != 1)
            CV_Error(cv::Error::StsBadArg, "LRN layer only supports odd values for local_size");
        alpha = params.get<double>("alpha", 1);
        beta = params.get<double>("beta", 0.75);
    }
    void LRNLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        CV_Assert(inputs.size() == 1);
        outputs.resize(1);
        Vec4i shape = inputs[0]->shape();
        outputs[0].create(shape);
        shape[1] = 1; //maybe make shape[0] = 1 too
        bufBlob.create(shape);
    }
    void LRNLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        Blob &src = *inputs[0];
        Blob &dst = outputs[0];
        switch (type)
        {
        case CHANNEL_NRM:
            channelNoramlization(src, dst);
            break;
        case SPATIAL_NRM:
            spatialNormalization(src, dst);
            break;
        default:
            CV_Error(cv::Error::StsNotImplemented, "Unimplemented mode of LRN layer");
            break;
        }
    }
    void LRNLayer::channelNoramlization(Blob &srcBlob, Blob &dstBlob)
    {
        int num = srcBlob.num();
        int channels = srcBlob.channels();
        for (int n = 0; n < num; n++)
        {
            Mat buf = bufBlob.getMat(n, 0);
            Mat accum = dstBlob.getMat(n, 0); //memory saving
            accum.setTo(0);
            for (int cn = 0; cn < channels; cn++)
            {
                cv::accumulateSquare(srcBlob.getMat(), accum);
            }
            accum.convertTo(accum, accum.type(), alpha/channels, 1);
            cv::pow(accum, beta, accum);
            for (int cn = channels - 1; cn >= 0; cn--)
            {
                cv::divide(srcBlob.getMat(n, cn), accum, dstBlob.getMat(n, cn));
            }
        }
    }
    void LRNLayer::spatialNormalization(Blob &srcBlob, Blob &dstBlob)
    {
        int num = srcBlob.num();
        int channels = srcBlob.channels();
        for (int n = 0; n < num; n++)
        {
            for (int cn = 0; cn < channels; cn++)
            {
                Mat src = srcBlob.getMat(n, cn);
                Mat dst = dstBlob.getMat(n, cn);
                uchar *dataDst0 = dst.data;
                cv::pow(srcBlob.getMat(n, cn), 2, dst);
                //TODO: check border type
                cv::boxFilter(dst, dst, dst.depth(), cv::Size(size, size), cv::Point(-1, -1), false, cv::BORDER_CONSTANT);
                dst.convertTo(dst, dst.type(), alpha/(size*size), 1);
                cv::pow(dst, beta, dst);
                cv::divide(src, dst, dst);
                CV_DbgAssert(dataDst0 == dst.data);
            }
        }
    }
 }
 }
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@ -0,0 +1,116 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include <algorithm>
 #include <stdlib.h>
 using std::max;
 namespace cv
 {
 namespace dnn
 {
    class SoftMaxLayer : public Layer
    {
        int axis;
        Blob maxAggregator;
    public:
        SoftMaxLayer(LayerParams &params);
        void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
        void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    };
    REGISTER_LAYER_CLASS(Softmax, SoftMaxLayer);
    SoftMaxLayer::SoftMaxLayer(LayerParams &params)
    {
        axis = params.get<int>("axis", 1);
        CV_Assert(0 <= axis && axis < 4);
    }
    void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        CV_Assert(inputs.size() == 1);
        Vec4i shape = inputs[0]->shape();
        outputs.resize(1);
        outputs[0].create(shape);
        shape[axis] = 1;
        maxAggregator.create(shape);
    }
    void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
    {
        Blob &src = *inputs[0];
        Blob &dst = outputs[0];
        float *srcPtr = src.ptr<float>();
        float *dstPtr = dst.ptr<float>();
        float *bufPtr = maxAggregator.ptr<float>();
        size_t totalSize = src.total();
        size_t outerSize = src.total(0, axis);
        size_t channels = src.size(axis);
        size_t innerSize = src.total(axis + 1, -1);
        size_t outerStep = src.total(axis);
        size_t cnStep = src.total(axis + 1);
        //compute max along axis
        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
        {
            size_t srcOffset = outerDim * outerStep;
            size_t bufOffset = outerDim * cnStep;
            memcpy(bufPtr + bufOffset, srcPtr + srcOffset, innerSize * sizeof(float));
            for (size_t cnDim = 1; cnDim < channels; cnDim++)
            {
                for (size_t i = 0; i < innerSize; i++)
                    bufPtr[bufOffset + i] = std::max(bufPtr[bufOffset + i], srcPtr[srcOffset + cnDim * cnStep + i]);
            }
        }
        //subtract max
        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
        {
            size_t srcOffset = outerDim * outerStep;
            size_t bufOffset = outerDim * cnStep;
            for (size_t cnDim = 0; cnDim < channels; cnDim++)
            {
                for (size_t i = 0; i < innerSize; i++)
                    dstPtr[srcOffset + cnDim * cnStep + i] = srcPtr[srcOffset + cnDim * cnStep + i] - bufPtr[bufOffset + i];
            }
        }
        cv::exp(dst.getMat(), dst.getMat());
        for (size_t outerDim = 0; outerDim < outerSize; outerDim++)
        {
            size_t srcOffset = outerDim * outerStep;
            size_t bufOffset = outerDim * cnStep;
            //sum exp along axis
            for (size_t i = 0; i < innerSize; i++)
                bufPtr[bufOffset + i] = 0.f;
            for (size_t cnDim = 0; cnDim < channels; cnDim++)
            {
                for (size_t i = 0; i < innerSize; i++)
                    bufPtr[bufOffset + i] += dstPtr[srcOffset + cnDim * cnStep + i];
            }
            //divide by computed sum
            for (size_t cnDim = 0; cnDim < channels; cnDim++)
            {
                for (size_t i = 0; i < innerSize; i++)
                    dstPtr[srcOffset + cnDim * cnStep + i] /= bufPtr[bufOffset + i];
            }
        }
    }
 }
 }