Updated Halide tests. Simplified batch norm using Halide. Bug fix in convolutional layer.

8 years ago · 37cf4974a4
parent e551d15c2b
commit 37cf4974a4
4 changed files with 16 additions and 39 deletions
--- a/modules/dnn/perf/perf_halide_net.cpp
+++ b/modules/dnn/perf/perf_halide_net.cpp
@ -39,7 +39,7 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
    else
        CV_Error(Error::StsNotImplemented, "Unknown framework " + framework);

-    net->setInput(blobFromImage(input, 1.0, false));
+    net->setInput(blobFromImage(input, 1.0, Size(), Scalar(), false));
    net->setPreferableBackend(DNN_BACKEND_HALIDE);
    net->setPreferableTarget(targetId);
    net->setHalideScheduler(scheduler);
@ -52,7 +52,7 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
 PERF_TEST(GoogLeNet, HalidePerfTest)
 {
    Net net;
-    loadNet("dnn/bvlc_googlenet2.caffemodel", "dnn/bvlc_googlenet.prototxt",
+    loadNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
            "", 227, 227, "prob", "caffe", DNN_TARGET_CPU, &net);
    TEST_CYCLE() net.forward();
    SANITY_CHECK_NOTHING();
--- a/modules/dnn/src/layers/batch_norm_layer.cpp
+++ b/modules/dnn/src/layers/batch_norm_layer.cpp
@ -167,32 +167,9 @@ public:
        Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
        Halide::Var x("x"), y("y"), c("c"), n("n");

-        const int weightsBlobIndex = 2;
-        const int biasBlobIndex = weightsBlobIndex + hasWeights;
-        const int numChannels = blobs[0].total();
-        float* meanData = (float*)blobs[0].data;
-        float* stdData = (float*)blobs[1].data;
-        float* weightsData = (hasWeights ? (float*)blobs[weightsBlobIndex].data : NULL);
-        float* biasData = (hasBias ? (float*)blobs[biasBlobIndex].data : NULL);
-
-        float varMeanScale = 1.f;
-        if (!hasWeights && !hasBias) {
-            varMeanScale = *blobs[2].ptr<float>();
-            if (varMeanScale != 0)
-                varMeanScale = 1/varMeanScale;
-        }
-
-        Halide::Buffer<float> weights(numChannels);
-        Halide::Buffer<float> bias(numChannels);
-        for (int i = 0; i < numChannels; ++i)
-        {
-            weights(i) = (hasWeights ? weightsData[i] : 1.0f) /
-                         sqrt(stdData[i] * varMeanScale + epsilon);
-            bias(i) = (hasBias ? biasData[i] : 0.0f) -
-                      weights(i) * meanData[i] * varMeanScale;
-        }
-        weights.set_host_dirty();
-        bias.set_host_dirty();
+        const int numChannels = weights_.total();
+        auto weights = wrapToHalideBuffer(weights_, {numChannels});
+        auto bias = wrapToHalideBuffer(bias_, {numChannels});
        top(x, y, c, n) = input * weights(c) + bias(c);
        return top;
    }
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@ -625,7 +625,7 @@ public:
        {
            // prepare weightsMat where each row is aligned and has enough zero padding on the right to
            // use vectorized (i.e. with intrinsics) loops without tail processing
-            Mat wm = blobs[0].reshape(1, outCn);
+            Mat wm = blobs[0].reshape(1, outCn).clone();
            if( wm.step1() % VEC_ALIGN != 0 )
            {
                int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);
--- a/modules/dnn/test/test_halide_nets.cpp
+++ b/modules/dnn/test/test_halide_nets.cpp
@ -34,7 +34,7 @@ static void loadNet(const std::string& weights, const std::string& proto,
 static void test(const std::string& weights, const std::string& proto,
                 const std::string& scheduler, int inWidth, int inHeight,
                 const std::string& outputLayer, const std::string& framework,
-                 int targetId)
+                 int targetId, double l1 = 1e-5, double lInf = 1e-4)
 {
    Mat input(inHeight, inWidth, CV_32FC3), outputDefault, outputHalide;
    randu(input, 0.0f, 1.0f);
@ -43,23 +43,23 @@ static void test(const std::string& weights, const std::string& proto,
    loadNet(weights, proto, framework, &netDefault);
    loadNet(weights, proto, framework, &netHalide);

-    netDefault.setInput(blobFromImage(input.clone(), 1.0f, false));
+    netDefault.setInput(blobFromImage(input.clone(), 1.0f, Size(), Scalar(), false));
    outputDefault = netDefault.forward(outputLayer).clone();

-    netHalide.setInput(blobFromImage(input.clone(), 1.0f, false));
+    netHalide.setInput(blobFromImage(input.clone(), 1.0f, Size(), Scalar(), false));
    netHalide.setPreferableBackend(DNN_BACKEND_HALIDE);
    netHalide.setPreferableTarget(targetId);
    netHalide.setHalideScheduler(scheduler);
    outputHalide = netHalide.forward(outputLayer).clone();

-    normAssert(outputDefault, outputHalide);
+    normAssert(outputDefault, outputHalide, "First run", l1, lInf);

    // An extra test: change input.
    input *= 0.1f;
-    netDefault.setInput(blobFromImage(input.clone(), 1.0, false));
-    netHalide.setInput(blobFromImage(input.clone(), 1.0, false));
+    netDefault.setInput(blobFromImage(input.clone(), 1.0, Size(), Scalar(), false));
+    netHalide.setInput(blobFromImage(input.clone(), 1.0, Size(), Scalar(), false));

-    normAssert(outputDefault, outputHalide);
+    normAssert(outputDefault, outputHalide, "Second run", l1, lInf);

    // Swap backends.
    netHalide.setPreferableBackend(DNN_BACKEND_DEFAULT);
@ -71,7 +71,7 @@ static void test(const std::string& weights, const std::string& proto,
    netDefault.setHalideScheduler(scheduler);
    outputHalide = netDefault.forward(outputLayer).clone();

-    normAssert(outputDefault, outputHalide);
+    normAssert(outputDefault, outputHalide, "Swap backends", l1, lInf);
 }

 ////////////////////////////////////////////////////////////////////////////////
@ -119,7 +119,7 @@ TEST(Reproducibility_ENet_Halide, Accuracy)
 {
    test(findDataFile("dnn/Enet-model-best.net", false), "",
         findDataFile("dnn/halide_scheduler_enet.yml", false),
-         512, 512, "l367_Deconvolution", "torch", DNN_TARGET_CPU);
+         512, 512, "l367_Deconvolution", "torch", DNN_TARGET_CPU, 2e-5, 0.15);
 };
 ////////////////////////////////////////////////////////////////////////////////
 // OpenCL target
@ -166,7 +166,7 @@ TEST(Reproducibility_ENet_Halide_opencl, Accuracy)
 {
    test(findDataFile("dnn/Enet-model-best.net", false), "",
         findDataFile("dnn/halide_scheduler_opencl_enet.yml", false),
-         512, 512, "l367_Deconvolution", "torch", DNN_TARGET_OPENCL);
+         512, 512, "l367_Deconvolution", "torch", DNN_TARGET_OPENCL, 2e-5, 0.14);
 };
 #endif  // HAVE_HALIDE