Add googlenet tutorial

10 years ago · 3154fc0dba
parent e52a7eeedb
commit 3154fc0dba
4 changed files with 129 additions and 64 deletions
--- a/modules/dnn/samples/caffe_googlenet.cpp
+++ b/modules/dnn/samples/caffe_googlenet.cpp
@ -51,18 +51,14 @@ using namespace cv::dnn;
 #include <cstdlib>
 using namespace std;

-/* It contains class number and probability of this class */
-typedef std::pair<int, double> ClassProb;
-
 /* Find best class for the blob (i. e. class with maximal probability) */
-ClassProb getMaxClass(dnn::Blob &probBlob)
+void getMaxClass(dnn::Blob &probBlob, int *classId, double *classProb)
 {
-    Mat probMat = probBlob.matRefConst().reshape(1, 1);
-    double classProb;
+    Mat probMat = probBlob.matRefConst().reshape(1, 1); //reshape the blob to 1x1000 matrix
    Point classNumber;
-    minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber);

-    return std::make_pair(classNumber.x, classProb);
+    minMaxLoc(probMat, NULL, classProb, NULL, &classNumber);
+    *classId = classNumber.x;
 }

 std::vector<String> readClassNames(const char *filename = "synset_words.txt")
@ -89,75 +85,80 @@ std::vector<String> readClassNames(const char *filename = "synset_words.txt")
    return classNames;
 }

-/* Create batch from the image */
-dnn::Blob makeInputBlob(const String &imagefile)
+int main(int argc, char **argv)
 {
-    Mat img = imread(imagefile);
-    if (img.empty())
+    String modelTxt = "bvlc_googlenet.prototxt";
+    String modelBin = "bvlc_googlenet.caffemodel";
+
+    //! [importer_creation]
+    Ptr<dnn::Importer> importer;
+    try //Try to import Caffe GoogleNet model
+    {
+        importer = dnn::createCaffeImporter(modelTxt, modelBin);
+    }
+    catch (const cv::Exception &err) //importer can throw errors, we will catch them
+    {
+        std::cerr << err.msg << std::endl;
+        importer = Ptr<Importer>(); //NULL
+    }
+    //! [importer_creation]
+
+    if (!importer)
    {
-        std::cerr << "Can't read image from file:" << std::endl;
-        std::cerr << imagefile << std::endl;
+        std::cerr << "Can't load network by using the following files: " << std::endl;
+        std::cerr << "prototxt:   " << modelTxt << std::endl;
+        std::cerr << "caffemodel: " << modelBin << std::endl;
+        std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
+        std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
        exit(-1);
    }

-    cvtColor(img, img, COLOR_BGR2RGB);
-    resize(img, img, Size(227, 227));
+    //! [network_initialization]
+    dnn::Net net;
+    importer->populateNet(net);

-    return dnn::Blob(img); //construct 4-dim Blob (i. e. batch)
-}
+    delete importer;
+    //! [network_initialization]

-int main(int argc, char **argv)
-{
-    /* Initialize network */
-    dnn::Net net;
+
+    String imagefile = (argc > 1) ? argv[1] : "space_shuttle.jpg";
+
+    //! [input_blob_preparation]
+    Mat img = imread(imagefile);
+    if (img.empty())
    {
-        String modelTxt = "bvlc_googlenet.prototxt";
-        String modelBin = "bvlc_googlenet.caffemodel";
-
-        Ptr<dnn::Importer> importer; //Try to import Caffe GoogleNet model
-        try
-        {
-            importer = dnn::createCaffeImporter(modelTxt, modelBin);
-        }
-        catch(const cv::Exception &er) //importer can throw errors, we will catch them
-        {
-            std::cerr << er.msg << std::endl;
-            importer = Ptr<Importer>(); //NULL
-        }
-
-        if (!importer)
-        {
-            std::cerr << "Can't load network by using the following files: " << std::endl;
-            std::cerr << "prototxt:   " << modelTxt << std::endl;
-            std::cerr << "caffemodel: " << modelBin << std::endl;
-            std::cerr << "Please, check them." << std::endl;
-            std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl;
-            std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl;
-            exit(-1);
-        }
-
-        importer->populateNet(net);
+        std::cerr << "Can't read image from the file: " << imagefile << std::endl;
+        exit(-1);
    }

-    std::vector<String> classNames = readClassNames();
+    //GoogLeNet accepts only 224x224 RGB-images
+    cvtColor(img, img, COLOR_BGR2RGB);
+    resize(img, img, Size(224, 224));

-    String filename = (argc > 1) ? argv[1] : "space_shuttle.jpg";
+    dnn::Blob inputBlob = dnn::Blob(img);
+    //! [input_blob_preparation]

-    Blob inputBlob = makeInputBlob(filename);   //make batch
-    net.setBlob(".data", inputBlob);            //set this blob to the network input
+    //! [setup_blob]
+    net.setBlob(".data", inputBlob);            //set the network input
+    //! [setup_blob]
+
+    //! [make_forward]
    net.forward();                              //compute output
+    //! [make_forward]

-    dnn::Blob prob = net.getBlob("prob");       //gather output of prob layer
-    ClassProb bc = getMaxClass(prob);           //find best class
+    //! [get_output]
+    dnn::Blob prob = net.getBlob("prob");       //gather output of "prob" layer

-    String className = classNames.at(bc.first);
+    int classId;
+    double classProb;
+    getMaxClass(prob, &classId, &classProb);    //find the best class
+    //! [get_output]

-    std::cout << "Best class:";
-    std::cout << " #" << bc.first;
-    std::cout << " (from " << prob.total(1) << ")";
-    std::cout << " \"" + className << "\"";
-    std::cout <<  std::endl;
-    std::cout << "Prob: " << bc.second * 100 << "%" << std::endl;
+    //! [print_info]
+    std::vector<String> classNames = readClassNames();
+    std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl;
+    std::cout << "Probability: " << classProb * 100 << "%" << std::endl;
+    //! [print_info]

    return 0;
 }
--- a/modules/dnn/tutorials/images/space_shuttle.jpg
+++ b/modules/dnn/tutorials/images/space_shuttle.jpg
--- a/modules/dnn/tutorials/tutorial_dnn_build.markdown
+++ b/modules/dnn/tutorials/tutorial_dnn_build.markdown
@ -32,18 +32,18 @@ git clone https://github.com/Itseez/opencv_contrib
       - choose the preffered project generator (Makefiles for Linux, MS Visual Studio for Windows);
       - also you can set many opencv build options, for more details see @ref tutorial_linux_install.

-# In the appeared list of variables find variable `OPENCV_EXTRA_MODULES_PATH` and set it to the **opencv_root**/opencv_contrib.
+-# In the appeared list of build parameters find parameter `OPENCV_EXTRA_MODULES_PATH` and set it to the **opencv_root**/opencv_contrib.

 -# *Configure* the project again, and set build options of dnn module:

-    - `BUILD_opencv_dnn` variable must exist and be checked.
+    - `BUILD_opencv_dnn` parameter must exist and be checked.

    - dnn module covers waste part of [Caffe](http://caffe.berkeleyvision.org) framework functionality.
      However, to load Caffe networks libprotobuf is required.
      You you can uncheck `BUILD_LIBPROTOBUF_FROM_SOURCES` flag to try use protobuf installed in your system.
      Elsewise libpotobuf will be built from opencv sources.

-    - You can additionaly check `opencv_dnn_BUILD_TORCH_IMPORTER` variable to build [Torch7](http://torch.ch) importer.
+    - You can additionaly check `opencv_dnn_BUILD_TORCH_IMPORTER` parameter to build [Torch7](http://torch.ch) importer.
       It allows you to use networks, generated by Torch7 [nn](https://github.com/torch/nn/blob/master/README.md) module.

 -# *Generate* the project and build it:
--- a/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown
+++ b/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown
@ -0,0 +1,64 @@
+Load Caffe framework models  {#tutorial_dnn_googlenet}
+===========================
+
+Introduction
+------------
+
+In this tutorial you will learn how to use opencv_dnn module for image classification by using
+GoogLeNet trained network from [Caffe model zoo](http://caffe.berkeleyvision.org/model_zoo.html).
+
+We will demostrate results of this example on the following picture.
+![Buran space shuttle](images/space_shuttle.jpg)
+
+Source Code
+-----------
+
+We will be using snippets from the example application, that can be downloaded [here](https://github.com/ludv1x/opencv_contrib/blob/master/modules/dnn/samples/caffe_googlenet.cpp).
+
+Explanation
+-----------
+
+-# Firstly, download GoogLeNet model files:
+   [bvlc_googlenet.prototxt  ](https://raw.githubusercontent.com/ludv1x/opencv_contrib/master/modules/dnn/samples/bvlc_googlenet.prototxt) and
+   [bvlc_googlenet.caffemodel](http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel)
+
+   Also you need file with names of [ILSVRC2012](http://image-net.org/challenges/LSVRC/2012/browse-synsets) classes:
+   [synset_words.txt](https://raw.githubusercontent.com/ludv1x/opencv_contrib/master/modules/dnn/samples/synset_words.txt).
+
+   Put these files into working dir of this program example.
+
+-# Create the importer of Caffe models
+   @snippet dnn/samples/caffe_googlenet.cpp importer_creation
+
+-# Create the network and initialize its by using the created importer
+   @snippet dnn/samples/caffe_googlenet.cpp network_initialization
+
+-# Read input image and convert to the blob, acceptable by GoogleNet
+   @snippet dnn/samples/caffe_googlenet.cpp input_blob_preparation
+   Firstly, we resize the image and change its channel sequence order.
+
+   Now image is actually a 3-dimensional array with 224x224x3 shape.
+
+   Next, we convert the image to 4-dimensional blob (so-called batch) with 1x2x224x224 shape by using special @ref cv::dnn::Blob constructor.
+
+-# Pass the blob to the network
+   @snippet dnn/samples/caffe_googlenet.cpp setup_blob
+   In bvlc_googlenet.prototxt the network input blob named as "data", therefore this blob labeled as ".data" in opencv_dnn API.
+
+   Other blobs labeled as "name_of_layer.name_of_layer_output".
+
+-# Make forward pass
+   @snippet dnn/samples/caffe_googlenet.cpp make_forward
+   During the forward pass output of each network layer is computed, but in this example we need output from "prob" layer only.
+
+-# Determine the best class
+   @snippet dnn/samples/caffe_googlenet.cpp get_output
+   We put the output of "prob" layer, which contain probabilities for each of 1000 ILSVRC2012 image classes, to the `prob` blob.
+   And find the index of element with maximal value in this one. This index correspond to the class of the image.
+
+-# Print the results
+   @snippet dnn/samples/caffe_googlenet.cpp print_info
+   For our image we get:
+> Best class: #812 'space shuttle'
+>
+> Probability: 99.6378%