opencv_contrib/modules/cnn_3dobj/samples/classifyIMG_demo.cpp

/*
 * Software License Agreement (BSD License)
 *
 *  Copyright (c) 2009, Willow Garage, Inc.
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *
 *   * Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above
 *     copyright notice, this list of conditions and the following
 *     disclaimer in the documentation and/or other materials provided
 *     with the distribution.
 *   * Neither the name of Willow Garage, Inc. nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *  POSSIBILITY OF SUCH DAMAGE.
 *
 */
#define HAVE_CAFFE
#include <opencv2/cnn_3dobj.hpp>
#include <iomanip>
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;

/* Return the indices of the top N values of vector v. */
std::vector<int> argmax(const std::vector<float>& v, int N)
{
    std::vector<std::pair<float, int> > pairs;
    for (size_t i = 0; i < v.size(); ++i)
        pairs.push_back(std::make_pair(v[i], i));
    std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end());
    std::vector<int> result;
    for (int i = 0; i < N; ++i)
        result.push_back(pairs[i].second);
    return result;
};

/* Return the indices of the top N values of vector v. */
std::vector<std::pair<string, float> > classify(const cv::Mat& reference, const cv::Mat& target, int N, std::vector<string> labels_)
{
    std::vector<float> output;
    for (int i = 0; i < reference.rows; i++)
    {
        cv::Mat f1 = reference.row(i);
        cv::Mat f2 = target;
        cv::Mat output_temp = f1-f2;
        output.push_back(cv::norm(output_temp));
    }
    std::vector<int> maxN = argmax(output, N);
    std::vector<std::pair<string, float> > predictions;
    for (int i = 0; i < N; ++i)
    {
        int idx = maxN[i];
        predictions.push_back(std::make_pair(labels_[idx], output[idx]));
    }
    return predictions;
};

int main(int argc, char** argv)
{
    const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe. If there little variance in data such as human faces, you can add a mean_file, otherwise it is not so useful}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}"
"{caffemodel | ../data/3d_triplet_iter_20000.caffemodel | caffe model for feature exrtaction.}"
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
"{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}"
"{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}"
"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
    cv::CommandLineParser parser(argc, argv, keys);
    parser.about("Demo for object data classification and pose estimation");
    if (parser.has("help"))
    {
        parser.printMessage();
        return 0;
    }
    string src_dir = parser.get<string>("src_dir");
    string caffemodel = parser.get<string>("caffemodel");
    string network_forIMG   = parser.get<string>("network_forIMG");
    string mean_file    = parser.get<string>("mean_file");
    string target_img   = parser.get<string>("target_img");
    string feature_blob = parser.get<string>("feature_blob");
    int num_candidate = parser.get<int>("num_candidate");
    string device = parser.get<string>("device");
    int dev_id = parser.get<int>("dev_id");

    cv::cnn_3dobj::descriptorExtractor descriptor;
    bool set_succeed = descriptor.setNet(device, dev_id);
    int net_ready;
    if (strcmp(mean_file.c_str(), "no") == 0)
        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel);
    else
        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file);
    std::vector<string> name_gallery;
    descriptor.listDir(src_dir.c_str(), name_gallery, false);
    descriptor.getLabellist(name_gallery);
    for (unsigned int i = 0; i < name_gallery.size(); i++) {
        name_gallery[i] = src_dir + name_gallery[i];
    }
    std::vector<cv::Mat> img_gallery;
    cv::Mat feature_reference;
    for (unsigned int i = 0; i < name_gallery.size(); i++) {
        img_gallery.push_back(cv::imread(name_gallery[i], -1));
    }
    descriptor.extract(net_ready, img_gallery, feature_reference, feature_blob);

    std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl;

    cv::Mat img = cv::imread(target_img, -1);
    // CHECK(!img.empty()) << "Unable to decode image " << target_img;
    std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl;
    std::vector<std::pair<string, float> > prediction;
    for (unsigned int i = 0; i < feature_reference.rows; i++)
        std::cout << feature_reference.row(i) << endl;
    cv::Mat feature_test;
    descriptor.extract(net_ready, img, feature_test, feature_blob);
    std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl;
    prediction = classify(feature_reference, feature_test, num_candidate, descriptor.labels_);
    // Print the top N prediction.
    std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
    for (size_t i = 0; i < prediction.size(); ++i) {
    std::pair<string, float> p = prediction[i];
    std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" << p.first << "\"" << std::endl;
    }
    return 0;
}