diff --git a/modules/dnn/samples/caffe_googlenet.cpp b/modules/dnn/samples/caffe_googlenet.cpp index 72c1db174..66e383162 100644 --- a/modules/dnn/samples/caffe_googlenet.cpp +++ b/modules/dnn/samples/caffe_googlenet.cpp @@ -51,18 +51,14 @@ using namespace cv::dnn; #include using namespace std; -/* It contains class number and probability of this class */ -typedef std::pair ClassProb; - /* Find best class for the blob (i. e. class with maximal probability) */ -ClassProb getMaxClass(dnn::Blob &probBlob) +void getMaxClass(dnn::Blob &probBlob, int *classId, double *classProb) { - Mat probMat = probBlob.matRefConst().reshape(1, 1); - double classProb; + Mat probMat = probBlob.matRefConst().reshape(1, 1); //reshape the blob to 1x1000 matrix Point classNumber; - minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber); - return std::make_pair(classNumber.x, classProb); + minMaxLoc(probMat, NULL, classProb, NULL, &classNumber); + *classId = classNumber.x; } std::vector readClassNames(const char *filename = "synset_words.txt") @@ -89,75 +85,80 @@ std::vector readClassNames(const char *filename = "synset_words.txt") return classNames; } -/* Create batch from the image */ -dnn::Blob makeInputBlob(const String &imagefile) +int main(int argc, char **argv) { - Mat img = imread(imagefile); - if (img.empty()) + String modelTxt = "bvlc_googlenet.prototxt"; + String modelBin = "bvlc_googlenet.caffemodel"; + + //! [importer_creation] + Ptr importer; + try //Try to import Caffe GoogleNet model + { + importer = dnn::createCaffeImporter(modelTxt, modelBin); + } + catch (const cv::Exception &err) //importer can throw errors, we will catch them + { + std::cerr << err.msg << std::endl; + importer = Ptr(); //NULL + } + //! [importer_creation] + + if (!importer) { - std::cerr << "Can't read image from file:" << std::endl; - std::cerr << imagefile << std::endl; + std::cerr << "Can't load network by using the following files: " << std::endl; + std::cerr << "prototxt: " << modelTxt << std::endl; + std::cerr << "caffemodel: " << modelBin << std::endl; + std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl; + std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl; exit(-1); } - cvtColor(img, img, COLOR_BGR2RGB); - resize(img, img, Size(227, 227)); + //! [network_initialization] + dnn::Net net; + importer->populateNet(net); - return dnn::Blob(img); //construct 4-dim Blob (i. e. batch) -} + delete importer; + //! [network_initialization] -int main(int argc, char **argv) -{ - /* Initialize network */ - dnn::Net net; + + String imagefile = (argc > 1) ? argv[1] : "space_shuttle.jpg"; + + //! [input_blob_preparation] + Mat img = imread(imagefile); + if (img.empty()) { - String modelTxt = "bvlc_googlenet.prototxt"; - String modelBin = "bvlc_googlenet.caffemodel"; - - Ptr importer; //Try to import Caffe GoogleNet model - try - { - importer = dnn::createCaffeImporter(modelTxt, modelBin); - } - catch(const cv::Exception &er) //importer can throw errors, we will catch them - { - std::cerr << er.msg << std::endl; - importer = Ptr(); //NULL - } - - if (!importer) - { - std::cerr << "Can't load network by using the following files: " << std::endl; - std::cerr << "prototxt: " << modelTxt << std::endl; - std::cerr << "caffemodel: " << modelBin << std::endl; - std::cerr << "Please, check them." << std::endl; - std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl; - std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl; - exit(-1); - } - - importer->populateNet(net); + std::cerr << "Can't read image from the file: " << imagefile << std::endl; + exit(-1); } - std::vector classNames = readClassNames(); + //GoogLeNet accepts only 224x224 RGB-images + cvtColor(img, img, COLOR_BGR2RGB); + resize(img, img, Size(224, 224)); - String filename = (argc > 1) ? argv[1] : "space_shuttle.jpg"; + dnn::Blob inputBlob = dnn::Blob(img); + //! [input_blob_preparation] - Blob inputBlob = makeInputBlob(filename); //make batch - net.setBlob(".data", inputBlob); //set this blob to the network input + //! [setup_blob] + net.setBlob(".data", inputBlob); //set the network input + //! [setup_blob] + + //! [make_forward] net.forward(); //compute output + //! [make_forward] - dnn::Blob prob = net.getBlob("prob"); //gather output of prob layer - ClassProb bc = getMaxClass(prob); //find best class + //! [get_output] + dnn::Blob prob = net.getBlob("prob"); //gather output of "prob" layer - String className = classNames.at(bc.first); + int classId; + double classProb; + getMaxClass(prob, &classId, &classProb); //find the best class + //! [get_output] - std::cout << "Best class:"; - std::cout << " #" << bc.first; - std::cout << " (from " << prob.total(1) << ")"; - std::cout << " \"" + className << "\""; - std::cout << std::endl; - std::cout << "Prob: " << bc.second * 100 << "%" << std::endl; + //! [print_info] + std::vector classNames = readClassNames(); + std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl; + std::cout << "Probability: " << classProb * 100 << "%" << std::endl; + //! [print_info] return 0; } diff --git a/modules/dnn/tutorials/images/space_shuttle.jpg b/modules/dnn/tutorials/images/space_shuttle.jpg new file mode 100644 index 000000000..412a91969 Binary files /dev/null and b/modules/dnn/tutorials/images/space_shuttle.jpg differ diff --git a/modules/dnn/tutorials/tutorial_dnn_build.markdown b/modules/dnn/tutorials/tutorial_dnn_build.markdown index 14ba758a7..32bf3194c 100644 --- a/modules/dnn/tutorials/tutorial_dnn_build.markdown +++ b/modules/dnn/tutorials/tutorial_dnn_build.markdown @@ -32,18 +32,18 @@ git clone https://github.com/Itseez/opencv_contrib - choose the preffered project generator (Makefiles for Linux, MS Visual Studio for Windows); - also you can set many opencv build options, for more details see @ref tutorial_linux_install. --# In the appeared list of variables find variable `OPENCV_EXTRA_MODULES_PATH` and set it to the **opencv_root**/opencv_contrib. +-# In the appeared list of build parameters find parameter `OPENCV_EXTRA_MODULES_PATH` and set it to the **opencv_root**/opencv_contrib. -# *Configure* the project again, and set build options of dnn module: - - `BUILD_opencv_dnn` variable must exist and be checked. + - `BUILD_opencv_dnn` parameter must exist and be checked. - dnn module covers waste part of [Caffe](http://caffe.berkeleyvision.org) framework functionality. However, to load Caffe networks libprotobuf is required. You you can uncheck `BUILD_LIBPROTOBUF_FROM_SOURCES` flag to try use protobuf installed in your system. Elsewise libpotobuf will be built from opencv sources. - - You can additionaly check `opencv_dnn_BUILD_TORCH_IMPORTER` variable to build [Torch7](http://torch.ch) importer. + - You can additionaly check `opencv_dnn_BUILD_TORCH_IMPORTER` parameter to build [Torch7](http://torch.ch) importer. It allows you to use networks, generated by Torch7 [nn](https://github.com/torch/nn/blob/master/README.md) module. -# *Generate* the project and build it: diff --git a/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown b/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown new file mode 100644 index 000000000..cc03acbb1 --- /dev/null +++ b/modules/dnn/tutorials/tutorial_dnn_googlenet.markdown @@ -0,0 +1,64 @@ +Load Caffe framework models {#tutorial_dnn_googlenet} +=========================== + +Introduction +------------ + +In this tutorial you will learn how to use opencv_dnn module for image classification by using +GoogLeNet trained network from [Caffe model zoo](http://caffe.berkeleyvision.org/model_zoo.html). + +We will demostrate results of this example on the following picture. +![Buran space shuttle](images/space_shuttle.jpg) + +Source Code +----------- + +We will be using snippets from the example application, that can be downloaded [here](https://github.com/ludv1x/opencv_contrib/blob/master/modules/dnn/samples/caffe_googlenet.cpp). + +Explanation +----------- + +-# Firstly, download GoogLeNet model files: + [bvlc_googlenet.prototxt ](https://raw.githubusercontent.com/ludv1x/opencv_contrib/master/modules/dnn/samples/bvlc_googlenet.prototxt) and + [bvlc_googlenet.caffemodel](http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel) + + Also you need file with names of [ILSVRC2012](http://image-net.org/challenges/LSVRC/2012/browse-synsets) classes: + [synset_words.txt](https://raw.githubusercontent.com/ludv1x/opencv_contrib/master/modules/dnn/samples/synset_words.txt). + + Put these files into working dir of this program example. + +-# Create the importer of Caffe models + @snippet dnn/samples/caffe_googlenet.cpp importer_creation + +-# Create the network and initialize its by using the created importer + @snippet dnn/samples/caffe_googlenet.cpp network_initialization + +-# Read input image and convert to the blob, acceptable by GoogleNet + @snippet dnn/samples/caffe_googlenet.cpp input_blob_preparation + Firstly, we resize the image and change its channel sequence order. + + Now image is actually a 3-dimensional array with 224x224x3 shape. + + Next, we convert the image to 4-dimensional blob (so-called batch) with 1x2x224x224 shape by using special @ref cv::dnn::Blob constructor. + +-# Pass the blob to the network + @snippet dnn/samples/caffe_googlenet.cpp setup_blob + In bvlc_googlenet.prototxt the network input blob named as "data", therefore this blob labeled as ".data" in opencv_dnn API. + + Other blobs labeled as "name_of_layer.name_of_layer_output". + +-# Make forward pass + @snippet dnn/samples/caffe_googlenet.cpp make_forward + During the forward pass output of each network layer is computed, but in this example we need output from "prob" layer only. + +-# Determine the best class + @snippet dnn/samples/caffe_googlenet.cpp get_output + We put the output of "prob" layer, which contain probabilities for each of 1000 ILSVRC2012 image classes, to the `prob` blob. + And find the index of element with maximal value in this one. This index correspond to the class of the image. + +-# Print the results + @snippet dnn/samples/caffe_googlenet.cpp print_info + For our image we get: +> Best class: #812 'space shuttle' +> +> Probability: 99.6378% \ No newline at end of file