From 97d49a8834737b02f129cc4005e5b49a2194d8a9 Mon Sep 17 00:00:00 2001 From: Wangyida Date: Fri, 21 Aug 2015 20:17:14 +0800 Subject: [PATCH] add C++ tutorial samples about data generation and classifier --- modules/cnn_3dobj/samples/CMakeLists.txt | 6 +- ...classifyIMG_demo.cpp => demo_classify.cpp} | 40 ++++++++-- ...lysis_demo.cpp => demo_model_analysis.cpp} | 22 +++++- ...dobj_demo.cpp => demo_sphereview_data.cpp} | 49 +++++++----- .../data_generation/data_generation.markdown | 75 +++++++++++++++++++ .../feature_classification/classify.markdown | 66 ++++++++++++++++ .../model_analysis/model_analysis.markdown | 60 +++++++++++++++ .../table_of_content_cnn_3dobj.markdown | 26 +++++++ 8 files changed, 313 insertions(+), 31 deletions(-) rename modules/cnn_3dobj/samples/{classifyIMG_demo.cpp => demo_classify.cpp} (84%) rename modules/cnn_3dobj/samples/{model_analysis_demo.cpp => demo_model_analysis.cpp} (88%) rename modules/cnn_3dobj/samples/{sphereview_3dobj_demo.cpp => demo_sphereview_data.cpp} (79%) create mode 100644 modules/cnn_3dobj/tutorials/data_generation/data_generation.markdown create mode 100644 modules/cnn_3dobj/tutorials/feature_classification/classify.markdown create mode 100644 modules/cnn_3dobj/tutorials/model_analysis/model_analysis.markdown create mode 100644 modules/cnn_3dobj/tutorials/table_of_content_cnn_3dobj.markdown diff --git a/modules/cnn_3dobj/samples/CMakeLists.txt b/modules/cnn_3dobj/samples/CMakeLists.txt index 21e2b4d34..65a2f3609 100644 --- a/modules/cnn_3dobj/samples/CMakeLists.txt +++ b/modules/cnn_3dobj/samples/CMakeLists.txt @@ -3,15 +3,15 @@ SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb ") SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") project(sphereview_test) find_package(OpenCV REQUIRED) -set(SOURCES_generator sphereview_3dobj_demo.cpp) +set(SOURCES_generator demo_sphereview_data.cpp) include_directories(${OpenCV_INCLUDE_DIRS}) add_executable(sphereview_test ${SOURCES_generator}) target_link_libraries(sphereview_test ${OpenCV_LIBS}) -set(SOURCES_classifier classifyIMG_demo.cpp) +set(SOURCES_classifier demo_classify.cpp) add_executable(classify_test ${SOURCES_classifier}) target_link_libraries(classify_test ${OpenCV_LIBS}) -set(SOURCES_modelanalysis model_analysis_demo.cpp) +set(SOURCES_modelanalysis demo_model_analysis.cpp) add_executable(model_test ${SOURCES_modelanalysis}) target_link_libraries(model_test ${OpenCV_LIBS}) diff --git a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp b/modules/cnn_3dobj/samples/demo_classify.cpp similarity index 84% rename from modules/cnn_3dobj/samples/classifyIMG_demo.cpp rename to modules/cnn_3dobj/samples/demo_classify.cpp index 2c0e1ebb1..955cb374a 100644 --- a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp +++ b/modules/cnn_3dobj/samples/demo_classify.cpp @@ -32,6 +32,11 @@ * POSSIBILITY OF SUCH DAMAGE. * */ +/** + * @file demo_classify.cpp + * @brief Feature extraction and classification. + * @author Yida Wang + */ #define HAVE_CAFFE #include #include @@ -40,7 +45,10 @@ using namespace cv; using namespace std; using namespace cv::cnn_3dobj; -/* Get the file name from a root dictionary. */ +/** + * @function listDir + * @brief Making all files names under a directory into a list + */ void listDir(const char *path, std::vector& files, bool r) { DIR *pDir; @@ -70,9 +78,12 @@ void listDir(const char *path, std::vector& files, bool r) sort(files.begin(),files.end()); }; +/** + * @function main + */ int main(int argc, char** argv) { - const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe. If there little variance in data such as human faces, you can add a mean_file, otherwise it is not so useful}" + const String keys = "{help | | This sample will extract featrues from reference images and target image for classification. You can add a mean_file if there little variance in data such as human faces, otherwise it is not so useful}" "{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}" "{caffemodel | ../../testdata/cv/3d_triplet_iter_30000.caffemodel | caffe model for feature exrtaction.}" "{network_forIMG | ../../testdata/cv/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}" @@ -80,10 +91,12 @@ int main(int argc, char** argv) "{target_img | ../data/images_all/1_8.png | Path of image waiting to be classified.}" "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}" "{num_candidate | 15 | Number of candidates in gallery as the prediction result.}" -"{device | CPU | device}" -"{dev_id | 0 | dev_id}"; +"{device | CPU | Device type: CPU or GPU}" +"{dev_id | 0 | Device id}"; + + /* get parameters from comand line */ cv::CommandLineParser parser(argc, argv, keys); - parser.about("Demo for object data classification and pose estimation"); + parser.about("Feature extraction and classification"); if (parser.has("help")) { parser.printMessage(); @@ -99,13 +112,18 @@ int main(int argc, char** argv) string device = parser.get("device"); int dev_id = parser.get("dev_id"); + /* Initialize a net work with Device */ cv::cnn_3dobj::descriptorExtractor descriptor(device); std::cout << "Using" << descriptor.getDeviceType() << std::endl; + + /* Load net with the caffe trained net work parameter and structure */ if (strcmp(mean_file.c_str(), "no") == 0) descriptor.loadNet(network_forIMG, caffemodel); else descriptor.loadNet(network_forIMG, caffemodel, mean_file); std::vector name_gallery; + + /* List the file names under a given path */ listDir(src_dir.c_str(), name_gallery, false); for (unsigned int i = 0; i < name_gallery.size(); i++) { @@ -117,23 +135,31 @@ int main(int argc, char** argv) { img_gallery.push_back(cv::imread(name_gallery[i], -1)); } + + /* Extract feature from a set of images */ descriptor.extract(img_gallery, feature_reference, feature_blob); std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl; cv::Mat img = cv::imread(target_img, -1); - // CHECK(!img.empty()) << "Unable to decode image " << target_img; std::cout << std::endl << "---------- Features of gallery images ----------" << std::endl; std::vector > prediction; + + /* Print features of the reference images. */ for (unsigned int i = 0; i < feature_reference.rows; i++) std::cout << feature_reference.row(i) << endl; cv::Mat feature_test; descriptor.extract(img, feature_test, feature_blob); + /* Initialize a matcher which using L2 distance. */ cv::BFMatcher matcher(NORM_L2); std::vector > matches; + /* Have a KNN match on the target and reference images. */ matcher.knnMatch(feature_test, feature_reference, matches, num_candidate); + + /* Print feature of the target image waiting to be classified. */ std::cout << std::endl << "---------- Features of target image: " << target_img << "----------" << endl << feature_test << std::endl; - // Print the top N prediction. + + /* Print the top N prediction. */ std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl; for (size_t i = 0; i < matches[0].size(); ++i) { diff --git a/modules/cnn_3dobj/samples/model_analysis_demo.cpp b/modules/cnn_3dobj/samples/demo_model_analysis.cpp similarity index 88% rename from modules/cnn_3dobj/samples/model_analysis_demo.cpp rename to modules/cnn_3dobj/samples/demo_model_analysis.cpp index 37b3d1729..93390e02d 100644 --- a/modules/cnn_3dobj/samples/model_analysis_demo.cpp +++ b/modules/cnn_3dobj/samples/demo_model_analysis.cpp @@ -32,6 +32,11 @@ * POSSIBILITY OF SUCH DAMAGE. * */ +/** + * @file sphereview_3dobj_demo.cpp + * @brief Generating training data for CNN with triplet loss. + * @author Yida Wang + */ #define HAVE_CAFFE #include #include "opencv2/imgproc.hpp" @@ -52,6 +57,7 @@ int main(int argc, char** argv) "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}" "{device | CPU | device}" "{dev_id | 0 | dev_id}"; + /* Get parameters from comand line. */ cv::CommandLineParser parser(argc, argv, keys); parser.about("Demo for object data classification and pose estimation"); if (parser.has("help")) @@ -70,13 +76,23 @@ int main(int argc, char** argv) string device = parser.get("device"); int dev_id = parser.get("dev_id"); - std::vector ref_img; + /* Sample which is most closest in pose to reference image + *and also the same class. + */ ref_img.push_back(ref_img1); + /* Sample which is less closest in pose to reference image + *and also the same class. + */ ref_img.push_back(ref_img2); + /* Sample which is very close in pose to reference image + *but not the same class. + */ ref_img.push_back(ref_img3); + /* Initialize a net work with Device. */ cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id); + /* Load net with the caffe trained net work parameter and structure. */ if (strcmp(mean_file.c_str(), "no") == 0) descriptor.loadNet(network_forIMG, caffemodel); else @@ -116,6 +132,10 @@ int main(int argc, char** argv) } bool pose_pass = false; bool class_pass = false; + /* Have comparations on the distance between reference image and 3 other images + *distance between closest sample and reference image should be smallest and + *distance between sample in another class and reference image should be largest. + */ if (matches[0] < matches[1] && matches[0] < matches[2]) pose_pass = true; if (matches[1] < matches[2]) diff --git a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp b/modules/cnn_3dobj/samples/demo_sphereview_data.cpp similarity index 79% rename from modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp rename to modules/cnn_3dobj/samples/demo_sphereview_data.cpp index 43933f577..df8c28b96 100644 --- a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp +++ b/modules/cnn_3dobj/samples/demo_sphereview_data.cpp @@ -32,6 +32,11 @@ * POSSIBILITY OF SUCH DAMAGE. * */ +/** + * @file demo_sphereview_data.cpp + * @brief Generating training data for CNN with triplet loss. + * @author Yida Wang + */ #define HAVE_CAFFE #include #include @@ -44,14 +49,15 @@ int main(int argc, char *argv[]) { const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../data/3Dmodel/ape.ply -imagedir=../data/images_all/ -labeldir=../data/label_all.txt -num_class=4 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}" "{ite_depth | 2 | Iteration of sphere generation.}" -"{plymodel | ../data/3Dmodel/ape.ply | path of the '.ply' file for image rendering. }" -"{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }" -"{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }" -"{num_class | 4 | total number of classes of models}" -"{label_class | 0 | class label of current .ply model}" -"{rgb_use | 0 | use RGB image or grayscale}"; +"{plymodel | ../data/3Dmodel/ape.ply | Path of the '.ply' file for image rendering. }" +"{imagedir | ../data/images_all/ | Path of the generated images for one particular .ply model. }" +"{labeldir | ../data/label_all.txt | Path of the generated images for one particular .ply model. }" +"{num_class | 4 | Total number of classes of models}" +"{label_class | 0 | Class label of current .ply model}" +"{rgb_use | 0 | Use RGB image or grayscale}"; + /* Get parameters from comand line. */ cv::CommandLineParser parser(argc, argv, keys); - parser.about("Demo for Sphere View data generation"); + parser.about("Generating training data for CNN with triplet loss"); if (parser.has("help")) { parser.printMessage(); @@ -70,23 +76,25 @@ int main(int argc, char *argv[]) char* p=(char*)labeldir.data(); imglabel.open(p, fstream::app|fstream::out); bool camera_pov = (true); - /// Create a window + /* Create a window using viz. */ viz::Viz3d myWindow("Coordinate Frame"); + /* Set window size as 64*64, we use this scale as default. */ myWindow.setWindowSize(Size(64,64)); - /// Add coordinate axes + /* Add coordinate axes. */ myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem()); + /* Set background color. */ myWindow.setBackgroundColor(viz::Color::gray()); myWindow.spin(); - /// Set background color - /// Let's assume camera has the following properties - /// Create a cloud widget. + /* Create a Mesh widget, loading .ply models. */ viz::Mesh objmesh = viz::Mesh::load(plymodel); + /* Get the center of the generated mesh widget, cause some .ply files. */ Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud); float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point); Point3d cam_y_dir(0.0f,0.0f,1.0f); const char* headerPath = "../data/header_for_"; const char* binaryPath = "../data/binary_"; ViewSphere.createHeader((int)campos.size(), 64, 64, headerPath); + /* Images will be saved as .png files. */ for(int pose = 0; pose < (int)campos.size(); pose++){ char* temp = new char; sprintf (temp,"%d",label_class); @@ -97,17 +105,16 @@ int main(int argc, char *argv[]) filename += ".png"; imglabel << filename << ' ' << (int)(campos.at(pose).x*100) << ' ' << (int)(campos.at(pose).y*100) << ' ' << (int)(campos.at(pose).z*100) << endl; filename = imagedir + filename; - /// We can get the pose of the cam using makeCameraPoses + /* Get the pose of the camera using makeCameraPoses. */ Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point); - /// We can get the transformation matrix from camera coordinate system to global using - /// - makeTransformToGlobal. We need the axes of the camera + /* Get the transformation matrix from camera coordinate system to global. */ Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose)); viz::WMesh mesh_widget(objmesh); - /// Pose of the widget in camera frame + /* Pose of the widget in camera frame. */ Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f)); - /// Pose of the widget in global frame + /* Pose of the widget in global frame. */ Affine3f cloud_pose_global = transform * cloud_pose; - /// Visualize camera frame + /* Visualize camera frame. */ if (!camera_pov) { viz::WCameraPosition cpw(1); // Coordinate axes @@ -116,14 +123,16 @@ int main(int argc, char *argv[]) myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose); } - /// Visualize widget + /* Visualize widget. */ mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0); myWindow.showWidget("ape", mesh_widget, cloud_pose_global); - /// Set the viewer pose to that of camera + /* Set the viewer pose to that of camera. */ if (camera_pov) myWindow.setViewerPose(cam_pose); + /* Save screen shot as images. */ myWindow.saveScreenshot(filename); + /* Write images into binary files for further using in CNN training. */ ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use); } imglabel.close(); diff --git a/modules/cnn_3dobj/tutorials/data_generation/data_generation.markdown b/modules/cnn_3dobj/tutorials/data_generation/data_generation.markdown new file mode 100644 index 000000000..34de7f3ae --- /dev/null +++ b/modules/cnn_3dobj/tutorials/data_generation/data_generation.markdown @@ -0,0 +1,75 @@ +Training data generation using Icosphere {#tutorial_data_generation} +============= + +Goal +---- + +In this tutorial you will learn how to + +- Conduct a point cloud of camera view on sphere. +- Generate training images using 3D model. + +Code +---- + +You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_sphereview_data.cpp). +@include cnn_3dobj/samples/demo_sphereview_data.cpp + +Explanation +----------- + +Here is the general structure of the program: + +- Create a window. + @code{.cpp} + viz::Viz3d myWindow("Coordinate Frame"); + @endcode + +- Set window size as 64*64, we use this scale as default. + @code{.cpp} + myWindow.setWindowSize(Size(64,64)); + @endcode + +- Add coordinate axes. + @code{.cpp} + myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem()); + myWindow.setBackgroundColor(viz::Color::gray()); + myWindow.spin(); + @endcode + +- Create a Mesh widget, loading .ply models. + @code{.cpp} + viz::Mesh objmesh = viz::Mesh::load(plymodel); + @endcode +- Get the center of the generated mesh widget, cause some .ply files. + @code{.cpp} + Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud); + @endcode + +- Get the pose of the camera using makeCameraPoses. + @code{.cpp} + Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point); + @endcode + +- Get the transformation matrix from camera coordinate system to global. + @code{.cpp} + Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose)); + viz::WMesh mesh_widget(objmesh); + @endcode + +- Save screen shot as images. + @code{.cpp} + myWindow.saveScreenshot(filename); + @endcode + +- Write images into binary files for further using in CNN training. + @code{.cpp} + ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use); + @endcode + +Results +------- + +Here is collection images created by this demo using 4 model. + +![](images_all/1_8.png) diff --git a/modules/cnn_3dobj/tutorials/feature_classification/classify.markdown b/modules/cnn_3dobj/tutorials/feature_classification/classify.markdown new file mode 100644 index 000000000..e15576551 --- /dev/null +++ b/modules/cnn_3dobj/tutorials/feature_classification/classify.markdown @@ -0,0 +1,66 @@ +Classify {#tutorial_classify} +=============== + +Goal +---- + +In this tutorial you will learn how to + +- How to extract feature from an image +- How to extract features from images under a given root path +- How to make a prediction using reference images and target image + +Code +---- + +You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_classify.cpp). +@include cnn_3dobj/samples/demo_classify.cpp + +Explanation +----------- + +Here is the general structure of the program: + +- Initialize a net work with Device. + @code{.cpp} + cv::cnn_3dobj::descriptorExtractor descriptor(device); + @endcode + +- Load net with the caffe trained net work parameter and structure. + @code{.cpp} + if (strcmp(mean_file.c_str(), "no") == 0) + descriptor.loadNet(network_forIMG, caffemodel); + else + descriptor.loadNet(network_forIMG, caffemodel, mean_file); + @endcode + +- List the file names under a given path. + @code{.cpp} + listDir(src_dir.c_str(), name_gallery, false); + for (unsigned int i = 0; i < name_gallery.size(); i++) + { + name_gallery[i] = src_dir + name_gallery[i]; + } + @endcode + +- Extract feature from a set of images. + @code{.cpp} + descriptor.extract(img_gallery, feature_reference, feature_blob); + @endcode + +- Initialize a matcher which using L2 distance. + @code{.cpp} + cv::BFMatcher matcher(NORM_L2); + std::vector > matches; + @endcode + +- Have a KNN match on the target and reference images. + @code{.cpp} + matcher.knnMatch(feature_test, feature_reference, matches, num_candidate); + @endcode + +- Print features of the reference images. + @code{.cpp}std::cout << std::endl << "---------- Features of target image: " << target_img << "----------" << endl << feature_test << std::endl; + @endcode +Results +------- diff --git a/modules/cnn_3dobj/tutorials/model_analysis/model_analysis.markdown b/modules/cnn_3dobj/tutorials/model_analysis/model_analysis.markdown new file mode 100644 index 000000000..3292e1a3f --- /dev/null +++ b/modules/cnn_3dobj/tutorials/model_analysis/model_analysis.markdown @@ -0,0 +1,60 @@ +Training data generation using Icosphere {#tutorial_model_analysis} +============= + +Goal +---- + +In this tutorial you will learn how to + +- Extract feature from particular image. +- Have a meaningful comparation on the extracted feature. + +Code +---- + +You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_model_analysis.cpp). +@include cnn_3dobj/samples/demo_model_analysis.cpp + +Explanation +----------- + +Here is the general structure of the program: + +- Sample which is most closest in pose to reference image and also the same class. + @code{.cpp} + ref_img.push_back(ref_img1); + @endcode + +- Sample which is less closest in pose to reference image and also the same class. + @code{.cpp} + ref_img.push_back(ref_img2); + @endcode + +- Sample which is very close in pose to reference image but not the same class. + @code{.cpp} + ref_img.push_back(ref_img3); + @endcode + +- Initialize a net work with Device. + @code{.cpp} + cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id); + @endcode +- Load net with the caffe trained net work parameter and structure. + @code{.cpp} + if (strcmp(mean_file.c_str(), "no") == 0) + descriptor.loadNet(network_forIMG, caffemodel); + else + descriptor.loadNet(network_forIMG, caffemodel, mean_file); + @endcode + +- Have comparations on the distance between reference image and 3 other images + distance between closest sample and reference image should be smallest and + distance between sample in another class and reference image should be largest. + @code{.cpp} + if (matches[0] < matches[1] && matches[0] < matches[2]) + pose_pass = true; + if (matches[1] < matches[2]) + class_pass = true; + @endcode +Results +------- diff --git a/modules/cnn_3dobj/tutorials/table_of_content_cnn_3dobj.markdown b/modules/cnn_3dobj/tutorials/table_of_content_cnn_3dobj.markdown new file mode 100644 index 000000000..64e01bcb3 --- /dev/null +++ b/modules/cnn_3dobj/tutorials/table_of_content_cnn_3dobj.markdown @@ -0,0 +1,26 @@ +CNN for 3D Object Classification and Pose Estimation {#tutorial_table_of_content_cnn_3dobj} +========== + +- @subpage tutorial_data_generation + + *Compatibility:* \> OpenCV 3.0.0 + + *Author:* Yida Wang + + You will learn how to generate training images from 3D models with proper poses for CNN training. + +- @subpage tutorial_feature_classification + + *Compatibility:* \> OpenCV 3.0.0 + + *Author:* Yida Wang + + You will learn how to extract features from images and make a prediction using descriptor. + +- @subpage tutorial_model_analysis + + *Compatibility:* \> OpenCV 3.0.0 + + *Author:* Yida Wang + + You will learn how to have an analysis on performance of the trained Model.