add C++ tutorial samples about data generation and classifier

10 years ago · 97d49a8834
parent 197fba68f4
commit 97d49a8834
8 changed files with 313 additions and 31 deletions
--- a/modules/cnn_3dobj/samples/CMakeLists.txt
+++ b/modules/cnn_3dobj/samples/CMakeLists.txt
@ -3,15 +3,15 @@ SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb ")
 SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
 project(sphereview_test)
 find_package(OpenCV REQUIRED)
-set(SOURCES_generator sphereview_3dobj_demo.cpp)
+set(SOURCES_generator demo_sphereview_data.cpp)
 include_directories(${OpenCV_INCLUDE_DIRS})
 add_executable(sphereview_test ${SOURCES_generator})
 target_link_libraries(sphereview_test ${OpenCV_LIBS})

-set(SOURCES_classifier classifyIMG_demo.cpp)
+set(SOURCES_classifier demo_classify.cpp)
 add_executable(classify_test ${SOURCES_classifier})
 target_link_libraries(classify_test ${OpenCV_LIBS})

-set(SOURCES_modelanalysis model_analysis_demo.cpp)
+set(SOURCES_modelanalysis demo_model_analysis.cpp)
 add_executable(model_test ${SOURCES_modelanalysis})
 target_link_libraries(model_test ${OpenCV_LIBS})
--- a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp
+++ b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp
@ -32,6 +32,11 @@
 *  POSSIBILITY OF SUCH DAMAGE.
 *
 */
+/**
+ * @file demo_classify.cpp
+ * @brief Feature extraction and classification.
+ * @author Yida Wang
+ */
 #define HAVE_CAFFE
 #include <opencv2/cnn_3dobj.hpp>
 #include <opencv2/features2d/features2d.hpp>
@ -40,7 +45,10 @@ using namespace cv;
 using namespace std;
 using namespace cv::cnn_3dobj;

-/* Get the file name from a root dictionary. */
+/**
+ * @function listDir
+ * @brief Making all files names under a directory into a list
+ */
 void listDir(const char *path, std::vector<string>& files, bool r)
 {
    DIR *pDir;
@ -70,9 +78,12 @@ void listDir(const char *path, std::vector<string>& files, bool r)
    sort(files.begin(),files.end());
 };

+/**
+ * @function main
+ */
 int main(int argc, char** argv)
 {
-    const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe. If there little variance in data such as human faces, you can add a mean_file, otherwise it is not so useful}"
+    const String keys = "{help | | This sample will extract featrues from reference images and target image for classification. You can add a mean_file if there little variance in data such as human faces, otherwise it is not so useful}"
 "{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}"
 "{caffemodel | ../../testdata/cv/3d_triplet_iter_30000.caffemodel | caffe model for feature exrtaction.}"
 "{network_forIMG | ../../testdata/cv/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
@ -80,10 +91,12 @@ int main(int argc, char** argv)
 "{target_img | ../data/images_all/1_8.png | Path of image waiting to be classified.}"
 "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
 "{num_candidate | 15 | Number of candidates in gallery as the prediction result.}"
-"{device | CPU | device}"
-"{dev_id | 0 | dev_id}";
+"{device | CPU | Device type: CPU or GPU}"
+"{dev_id | 0 | Device id}";
+
+    /* get parameters from comand line */
    cv::CommandLineParser parser(argc, argv, keys);
-    parser.about("Demo for object data classification and pose estimation");
+    parser.about("Feature extraction and classification");
    if (parser.has("help"))
    {
        parser.printMessage();
@ -99,13 +112,18 @@ int main(int argc, char** argv)
    string device = parser.get<string>("device");
    int dev_id = parser.get<int>("dev_id");

+    /* Initialize a net work with Device */
    cv::cnn_3dobj::descriptorExtractor descriptor(device);
    std::cout << "Using" << descriptor.getDeviceType() << std::endl;
+
+    /* Load net with the caffe trained net work parameter and structure */
    if (strcmp(mean_file.c_str(), "no") == 0)
        descriptor.loadNet(network_forIMG, caffemodel);
    else
        descriptor.loadNet(network_forIMG, caffemodel, mean_file);
    std::vector<string> name_gallery;
+
+    /* List the file names under a given path */
    listDir(src_dir.c_str(), name_gallery, false);
    for (unsigned int i = 0; i < name_gallery.size(); i++)
    {
@ -117,23 +135,31 @@ int main(int argc, char** argv)
    {
        img_gallery.push_back(cv::imread(name_gallery[i], -1));
    }
+
+    /* Extract feature from a set of images */
    descriptor.extract(img_gallery, feature_reference, feature_blob);

    std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl;

    cv::Mat img = cv::imread(target_img, -1);
-    // CHECK(!img.empty()) << "Unable to decode image " << target_img;
    std::cout << std::endl << "---------- Features of gallery images ----------" << std::endl;
    std::vector<std::pair<string, float> > prediction;
+
+    /* Print features of the reference images. */
    for (unsigned int i = 0; i < feature_reference.rows; i++)
        std::cout << feature_reference.row(i) << endl;
    cv::Mat feature_test;
    descriptor.extract(img, feature_test, feature_blob);
+    /* Initialize a matcher which using L2 distance. */
    cv::BFMatcher matcher(NORM_L2);
    std::vector<std::vector<cv::DMatch> > matches;
+    /* Have a KNN match on the target and reference images. */
    matcher.knnMatch(feature_test, feature_reference, matches, num_candidate);
+
+    /* Print feature of the target image waiting to be classified. */
    std::cout << std::endl << "---------- Features of target image: " << target_img << "----------" << endl << feature_test << std::endl;
-    // Print the top N prediction.
+
+    /* Print the top N prediction. */
    std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
    for (size_t i = 0; i < matches[0].size(); ++i)
    {
--- a/modules/cnn_3dobj/samples/demo_model_analysis.cpp
+++ b/modules/cnn_3dobj/samples/demo_model_analysis.cpp
@ -32,6 +32,11 @@
 *  POSSIBILITY OF SUCH DAMAGE.
 *
 */
+/**
+ * @file sphereview_3dobj_demo.cpp
+ * @brief Generating training data for CNN with triplet loss.
+ * @author Yida Wang
+ */
 #define HAVE_CAFFE
 #include <iostream>
 #include "opencv2/imgproc.hpp"
@ -52,6 +57,7 @@ int main(int argc, char** argv)
 "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
 "{device | CPU | device}"
 "{dev_id | 0 | dev_id}";
+    /* Get parameters from comand line. */
    cv::CommandLineParser parser(argc, argv, keys);
    parser.about("Demo for object data classification and pose estimation");
    if (parser.has("help"))
@ -70,13 +76,23 @@ int main(int argc, char** argv)
    string device = parser.get<string>("device");
    int dev_id = parser.get<int>("dev_id");

-
    std::vector<string> ref_img;
+    /* Sample which is most closest in pose to reference image
+    *and also the same class.
+    */
    ref_img.push_back(ref_img1);
+    /* Sample which is less closest in pose to reference image
+    *and also the same class.
+    */
    ref_img.push_back(ref_img2);
+    /* Sample which is very close in pose to reference image
+    *but not the same class.
+    */
    ref_img.push_back(ref_img3);

+    /* Initialize a net work with Device. */
    cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
+    /* Load net with the caffe trained net work parameter and structure. */
    if (strcmp(mean_file.c_str(), "no") == 0)
        descriptor.loadNet(network_forIMG, caffemodel);
    else
@ -116,6 +132,10 @@ int main(int argc, char** argv)
    }
    bool pose_pass = false;
    bool class_pass = false;
+    /* Have comparations on the distance between reference image and 3 other images
+    *distance between closest sample and reference image should be smallest and
+    *distance between sample in another class and reference image should be largest.
+    */
    if (matches[0] < matches[1] && matches[0] < matches[2])
        pose_pass = true;
    if (matches[1] < matches[2])
--- a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
+++ b/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
@ -32,6 +32,11 @@
 *  POSSIBILITY OF SUCH DAMAGE.
 *
 */
+/**
+ * @file demo_sphereview_data.cpp
+ * @brief Generating training data for CNN with triplet loss.
+ * @author Yida Wang
+ */
 #define HAVE_CAFFE
 #include <opencv2/cnn_3dobj.hpp>
 #include <opencv2/viz/vizcore.hpp>
@ -44,14 +49,15 @@ int main(int argc, char *argv[])
 {
    const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../data/3Dmodel/ape.ply -imagedir=../data/images_all/ -labeldir=../data/label_all.txt -num_class=4 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}"
 "{ite_depth | 2 | Iteration of sphere generation.}"
-"{plymodel | ../data/3Dmodel/ape.ply | path of the '.ply' file for image rendering. }"
-"{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }"
-"{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }"
-"{num_class | 4 | total number of classes of models}"
-"{label_class | 0 | class label of current .ply model}"
-"{rgb_use | 0 | use RGB image or grayscale}";
+"{plymodel | ../data/3Dmodel/ape.ply | Path of the '.ply' file for image rendering. }"
+"{imagedir | ../data/images_all/ | Path of the generated images for one particular .ply model. }"
+"{labeldir | ../data/label_all.txt | Path of the generated images for one particular .ply model. }"
+"{num_class | 4 | Total number of classes of models}"
+"{label_class | 0 | Class label of current .ply model}"
+"{rgb_use | 0 | Use RGB image or grayscale}";
+    /* Get parameters from comand line. */
    cv::CommandLineParser parser(argc, argv, keys);
-    parser.about("Demo for Sphere View data generation");
+    parser.about("Generating training data for CNN with triplet loss");
    if (parser.has("help"))
    {
        parser.printMessage();
@ -70,23 +76,25 @@ int main(int argc, char *argv[])
    char* p=(char*)labeldir.data();
    imglabel.open(p, fstream::app|fstream::out);
    bool camera_pov = (true);
-    /// Create a window
+    /* Create a window using viz. */
    viz::Viz3d myWindow("Coordinate Frame");
+    /* Set window size as 64*64, we use this scale as default. */
    myWindow.setWindowSize(Size(64,64));
-    /// Add coordinate axes
+    /* Add coordinate axes. */
    myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem());
+    /* Set background color. */
    myWindow.setBackgroundColor(viz::Color::gray());
    myWindow.spin();
-    /// Set background color
-    /// Let's assume camera has the following properties
-    /// Create a cloud widget.
+    /* Create a Mesh widget, loading .ply models. */
    viz::Mesh objmesh = viz::Mesh::load(plymodel);
+    /* Get the center of the generated mesh widget, cause some .ply files.  */
    Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud);
    float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point);
    Point3d cam_y_dir(0.0f,0.0f,1.0f);
    const char* headerPath = "../data/header_for_";
    const char* binaryPath = "../data/binary_";
    ViewSphere.createHeader((int)campos.size(), 64, 64, headerPath);
+    /* Images will be saved as .png files. */
    for(int pose = 0; pose < (int)campos.size(); pose++){
        char* temp = new char;
        sprintf (temp,"%d",label_class);
@ -97,17 +105,16 @@ int main(int argc, char *argv[])
        filename += ".png";
        imglabel << filename << ' ' << (int)(campos.at(pose).x*100) << ' ' << (int)(campos.at(pose).y*100) << ' ' << (int)(campos.at(pose).z*100) << endl;
        filename = imagedir + filename;
-        /// We can get the pose of the cam using makeCameraPoses
+        /* Get the pose of the camera using makeCameraPoses. */
        Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point);
-        /// We can get the transformation matrix from camera coordinate system to global using
-        /// - makeTransformToGlobal. We need the axes of the camera
+        /* Get the transformation matrix from camera coordinate system to global. */
        Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose));
        viz::WMesh mesh_widget(objmesh);
-        /// Pose of the widget in camera frame
+        /* Pose of the widget in camera frame. */
        Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f));
-        /// Pose of the widget in global frame
+        /* Pose of the widget in global frame. */
        Affine3f cloud_pose_global = transform * cloud_pose;
-        /// Visualize camera frame
+        /* Visualize camera frame. */
        if (!camera_pov)
        {
            viz::WCameraPosition cpw(1); // Coordinate axes
@ -116,14 +123,16 @@ int main(int argc, char *argv[])
        myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose);
        }

-        /// Visualize widget
+        /* Visualize widget. */
        mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0);
        myWindow.showWidget("ape", mesh_widget, cloud_pose_global);

-        /// Set the viewer pose to that of camera
+        /* Set the viewer pose to that of camera. */
        if (camera_pov)
            myWindow.setViewerPose(cam_pose);
+        /* Save screen shot as images. */
        myWindow.saveScreenshot(filename);
+        /* Write images into binary files for further using in CNN training. */
        ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use);
    }
    imglabel.close();
--- a/modules/cnn_3dobj/tutorials/data_generation/data_generation.markdown
+++ b/modules/cnn_3dobj/tutorials/data_generation/data_generation.markdown
@ -0,0 +1,75 @@
+Training data generation using Icosphere {#tutorial_data_generation}
+=============
+
+Goal
+----
+
+In this tutorial you will learn how to
+
+-   Conduct a point cloud of camera view on sphere.
+-   Generate training images using 3D model.
+
+Code
+----
+
+You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_sphereview_data.cpp).
+@include cnn_3dobj/samples/demo_sphereview_data.cpp
+
+Explanation
+-----------
+
+Here is the general structure of the program:
+
+-   Create a window.
+    @code{.cpp}
+    viz::Viz3d myWindow("Coordinate Frame");
+    @endcode
+
+-   Set window size as 64*64, we use this scale as default.
+    @code{.cpp}
+    myWindow.setWindowSize(Size(64,64));
+    @endcode
+
+-   Add coordinate axes.
+    @code{.cpp}
+    myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem());
+    myWindow.setBackgroundColor(viz::Color::gray());
+    myWindow.spin();
+    @endcode
+
+-   Create a Mesh widget, loading .ply models.
+    @code{.cpp}
+    viz::Mesh objmesh = viz::Mesh::load(plymodel);
+    @endcode
+-   Get the center of the generated mesh widget, cause some .ply files.
+    @code{.cpp}
+    Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud);
+    @endcode
+
+-   Get the pose of the camera using makeCameraPoses.
+    @code{.cpp}
+    Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point);
+    @endcode
+
+-   Get the transformation matrix from camera coordinate system to global.
+    @code{.cpp}
+    Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose));
+    viz::WMesh mesh_widget(objmesh);
+    @endcode
+
+-   Save screen shot as images.
+    @code{.cpp}
+    myWindow.saveScreenshot(filename);
+    @endcode
+
+-   Write images into binary files for further using in CNN training.
+    @code{.cpp}
+    ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use);
+    @endcode
+
+Results
+-------
+
+Here is collection images created by this demo using 4 model.
+
+![](images_all/1_8.png)
--- a/modules/cnn_3dobj/tutorials/feature_classification/classify.markdown
+++ b/modules/cnn_3dobj/tutorials/feature_classification/classify.markdown
@ -0,0 +1,66 @@
+Classify {#tutorial_classify}
+===============
+
+Goal
+----
+
+In this tutorial you will learn how to
+
+-   How to extract feature from an image
+-   How to extract features from images under a given root path
+-   How to make a prediction using reference images and target image
+
+Code
+----
+
+You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_classify.cpp).
+@include cnn_3dobj/samples/demo_classify.cpp
+
+Explanation
+-----------
+
+Here is the general structure of the program:
+
+-   Initialize a net work with Device.
+    @code{.cpp}
+    cv::cnn_3dobj::descriptorExtractor descriptor(device);
+    @endcode
+
+-   Load net with the caffe trained net work parameter and structure.
+    @code{.cpp}
+    if (strcmp(mean_file.c_str(), "no") == 0)
+        descriptor.loadNet(network_forIMG, caffemodel);
+    else
+        descriptor.loadNet(network_forIMG, caffemodel, mean_file);
+    @endcode
+
+-   List the file names under a given path.
+    @code{.cpp}
+    listDir(src_dir.c_str(), name_gallery, false);
+    for (unsigned int i = 0; i < name_gallery.size(); i++)
+    {
+        name_gallery[i] = src_dir + name_gallery[i];
+    }
+    @endcode
+
+-   Extract feature from a set of images.
+    @code{.cpp}
+    descriptor.extract(img_gallery, feature_reference, feature_blob);
+    @endcode
+
+-   Initialize a matcher which using L2 distance.
+    @code{.cpp}
+    cv::BFMatcher matcher(NORM_L2);
+    std::vector<std::vector<cv::DMatch> > matches;
+    @endcode
+
+-   Have a KNN match on the target and reference images.
+    @code{.cpp}
+    matcher.knnMatch(feature_test, feature_reference, matches, num_candidate);
+    @endcode
+
+-   Print features of the reference images.
+    @code{.cpp}std::cout << std::endl << "---------- Features of target image: " << target_img << "----------" << endl << feature_test << std::endl;
+    @endcode
+Results
+-------
--- a/modules/cnn_3dobj/tutorials/model_analysis/model_analysis.markdown
+++ b/modules/cnn_3dobj/tutorials/model_analysis/model_analysis.markdown
@ -0,0 +1,60 @@
+Training data generation using Icosphere {#tutorial_model_analysis}
+=============
+
+Goal
+----
+
+In this tutorial you will learn how to
+
+-   Extract feature from particular image.
+-   Have a meaningful comparation on the extracted feature.
+
+Code
+----
+
+You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_model_analysis.cpp).
+@include cnn_3dobj/samples/demo_model_analysis.cpp
+
+Explanation
+-----------
+
+Here is the general structure of the program:
+
+-   Sample which is most closest in pose to reference image and also the same class.
+    @code{.cpp}
+    ref_img.push_back(ref_img1);
+    @endcode
+
+-   Sample which is less closest in pose to reference image and also the same class.
+    @code{.cpp}
+    ref_img.push_back(ref_img2);
+    @endcode
+
+-   Sample which is very close in pose to reference image but not the same class.
+    @code{.cpp}
+    ref_img.push_back(ref_img3);
+    @endcode
+
+-   Initialize a net work with Device.
+    @code{.cpp}
+    cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
+    @endcode
+-   Load net with the caffe trained net work parameter and structure.
+    @code{.cpp}
+    if (strcmp(mean_file.c_str(), "no") == 0)
+        descriptor.loadNet(network_forIMG, caffemodel);
+    else
+        descriptor.loadNet(network_forIMG, caffemodel, mean_file);
+    @endcode
+
+-   Have comparations on the distance between reference image and 3 other images
+    distance between closest sample and reference image should be smallest and
+    distance between sample in another class and reference image should be largest.
+    @code{.cpp}
+    if (matches[0] < matches[1] && matches[0] < matches[2])
+        pose_pass = true;
+    if (matches[1] < matches[2])
+        class_pass = true;
+    @endcode
+Results
+-------
--- a/modules/cnn_3dobj/tutorials/table_of_content_cnn_3dobj.markdown
+++ b/modules/cnn_3dobj/tutorials/table_of_content_cnn_3dobj.markdown
@ -0,0 +1,26 @@
+CNN for 3D Object Classification and Pose Estimation {#tutorial_table_of_content_cnn_3dobj}
+==========
+
+-   @subpage tutorial_data_generation
+
+    *Compatibility:* \> OpenCV 3.0.0
+
+    *Author:* Yida Wang
+
+    You will learn how to generate training images from 3D models with proper poses for CNN training.
+
+-   @subpage tutorial_feature_classification
+
+    *Compatibility:* \> OpenCV 3.0.0
+
+    *Author:* Yida Wang
+
+    You will learn how to extract features from images and make a prediction using descriptor.
+
+-   @subpage tutorial_model_analysis
+
+    *Compatibility:* \> OpenCV 3.0.0
+
+    *Author:* Yida Wang
+
+    You will learn how to have an analysis on performance of the trained Model.