add C++ tutorial samples about data generation and classifier

pull/276/head
Wangyida 9 years ago
parent 197fba68f4
commit 97d49a8834
  1. 6
      modules/cnn_3dobj/samples/CMakeLists.txt
  2. 40
      modules/cnn_3dobj/samples/demo_classify.cpp
  3. 22
      modules/cnn_3dobj/samples/demo_model_analysis.cpp
  4. 49
      modules/cnn_3dobj/samples/demo_sphereview_data.cpp
  5. 75
      modules/cnn_3dobj/tutorials/data_generation/data_generation.markdown
  6. 66
      modules/cnn_3dobj/tutorials/feature_classification/classify.markdown
  7. 60
      modules/cnn_3dobj/tutorials/model_analysis/model_analysis.markdown
  8. 26
      modules/cnn_3dobj/tutorials/table_of_content_cnn_3dobj.markdown

@ -3,15 +3,15 @@ SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb ")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
project(sphereview_test)
find_package(OpenCV REQUIRED)
set(SOURCES_generator sphereview_3dobj_demo.cpp)
set(SOURCES_generator demo_sphereview_data.cpp)
include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(sphereview_test ${SOURCES_generator})
target_link_libraries(sphereview_test ${OpenCV_LIBS})
set(SOURCES_classifier classifyIMG_demo.cpp)
set(SOURCES_classifier demo_classify.cpp)
add_executable(classify_test ${SOURCES_classifier})
target_link_libraries(classify_test ${OpenCV_LIBS})
set(SOURCES_modelanalysis model_analysis_demo.cpp)
set(SOURCES_modelanalysis demo_model_analysis.cpp)
add_executable(model_test ${SOURCES_modelanalysis})
target_link_libraries(model_test ${OpenCV_LIBS})

@ -32,6 +32,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*
*/
/**
* @file demo_classify.cpp
* @brief Feature extraction and classification.
* @author Yida Wang
*/
#define HAVE_CAFFE
#include <opencv2/cnn_3dobj.hpp>
#include <opencv2/features2d/features2d.hpp>
@ -40,7 +45,10 @@ using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
/* Get the file name from a root dictionary. */
/**
* @function listDir
* @brief Making all files names under a directory into a list
*/
void listDir(const char *path, std::vector<string>& files, bool r)
{
DIR *pDir;
@ -70,9 +78,12 @@ void listDir(const char *path, std::vector<string>& files, bool r)
sort(files.begin(),files.end());
};
/**
* @function main
*/
int main(int argc, char** argv)
{
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe. If there little variance in data such as human faces, you can add a mean_file, otherwise it is not so useful}"
const String keys = "{help | | This sample will extract featrues from reference images and target image for classification. You can add a mean_file if there little variance in data such as human faces, otherwise it is not so useful}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}"
"{caffemodel | ../../testdata/cv/3d_triplet_iter_30000.caffemodel | caffe model for feature exrtaction.}"
"{network_forIMG | ../../testdata/cv/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
@ -80,10 +91,12 @@ int main(int argc, char** argv)
"{target_img | ../data/images_all/1_8.png | Path of image waiting to be classified.}"
"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
"{num_candidate | 15 | Number of candidates in gallery as the prediction result.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
"{device | CPU | Device type: CPU or GPU}"
"{dev_id | 0 | Device id}";
/* get parameters from comand line */
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for object data classification and pose estimation");
parser.about("Feature extraction and classification");
if (parser.has("help"))
{
parser.printMessage();
@ -99,13 +112,18 @@ int main(int argc, char** argv)
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
/* Initialize a net work with Device */
cv::cnn_3dobj::descriptorExtractor descriptor(device);
std::cout << "Using" << descriptor.getDeviceType() << std::endl;
/* Load net with the caffe trained net work parameter and structure */
if (strcmp(mean_file.c_str(), "no") == 0)
descriptor.loadNet(network_forIMG, caffemodel);
else
descriptor.loadNet(network_forIMG, caffemodel, mean_file);
std::vector<string> name_gallery;
/* List the file names under a given path */
listDir(src_dir.c_str(), name_gallery, false);
for (unsigned int i = 0; i < name_gallery.size(); i++)
{
@ -117,23 +135,31 @@ int main(int argc, char** argv)
{
img_gallery.push_back(cv::imread(name_gallery[i], -1));
}
/* Extract feature from a set of images */
descriptor.extract(img_gallery, feature_reference, feature_blob);
std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl;
cv::Mat img = cv::imread(target_img, -1);
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
std::cout << std::endl << "---------- Features of gallery images ----------" << std::endl;
std::vector<std::pair<string, float> > prediction;
/* Print features of the reference images. */
for (unsigned int i = 0; i < feature_reference.rows; i++)
std::cout << feature_reference.row(i) << endl;
cv::Mat feature_test;
descriptor.extract(img, feature_test, feature_blob);
/* Initialize a matcher which using L2 distance. */
cv::BFMatcher matcher(NORM_L2);
std::vector<std::vector<cv::DMatch> > matches;
/* Have a KNN match on the target and reference images. */
matcher.knnMatch(feature_test, feature_reference, matches, num_candidate);
/* Print feature of the target image waiting to be classified. */
std::cout << std::endl << "---------- Features of target image: " << target_img << "----------" << endl << feature_test << std::endl;
// Print the top N prediction.
/* Print the top N prediction. */
std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
for (size_t i = 0; i < matches[0].size(); ++i)
{

@ -32,6 +32,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*
*/
/**
* @file sphereview_3dobj_demo.cpp
* @brief Generating training data for CNN with triplet loss.
* @author Yida Wang
*/
#define HAVE_CAFFE
#include <iostream>
#include "opencv2/imgproc.hpp"
@ -52,6 +57,7 @@ int main(int argc, char** argv)
"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
/* Get parameters from comand line. */
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for object data classification and pose estimation");
if (parser.has("help"))
@ -70,13 +76,23 @@ int main(int argc, char** argv)
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
std::vector<string> ref_img;
/* Sample which is most closest in pose to reference image
*and also the same class.
*/
ref_img.push_back(ref_img1);
/* Sample which is less closest in pose to reference image
*and also the same class.
*/
ref_img.push_back(ref_img2);
/* Sample which is very close in pose to reference image
*but not the same class.
*/
ref_img.push_back(ref_img3);
/* Initialize a net work with Device. */
cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
/* Load net with the caffe trained net work parameter and structure. */
if (strcmp(mean_file.c_str(), "no") == 0)
descriptor.loadNet(network_forIMG, caffemodel);
else
@ -116,6 +132,10 @@ int main(int argc, char** argv)
}
bool pose_pass = false;
bool class_pass = false;
/* Have comparations on the distance between reference image and 3 other images
*distance between closest sample and reference image should be smallest and
*distance between sample in another class and reference image should be largest.
*/
if (matches[0] < matches[1] && matches[0] < matches[2])
pose_pass = true;
if (matches[1] < matches[2])

@ -32,6 +32,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*
*/
/**
* @file demo_sphereview_data.cpp
* @brief Generating training data for CNN with triplet loss.
* @author Yida Wang
*/
#define HAVE_CAFFE
#include <opencv2/cnn_3dobj.hpp>
#include <opencv2/viz/vizcore.hpp>
@ -44,14 +49,15 @@ int main(int argc, char *argv[])
{
const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../data/3Dmodel/ape.ply -imagedir=../data/images_all/ -labeldir=../data/label_all.txt -num_class=4 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}"
"{ite_depth | 2 | Iteration of sphere generation.}"
"{plymodel | ../data/3Dmodel/ape.ply | path of the '.ply' file for image rendering. }"
"{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }"
"{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }"
"{num_class | 4 | total number of classes of models}"
"{label_class | 0 | class label of current .ply model}"
"{rgb_use | 0 | use RGB image or grayscale}";
"{plymodel | ../data/3Dmodel/ape.ply | Path of the '.ply' file for image rendering. }"
"{imagedir | ../data/images_all/ | Path of the generated images for one particular .ply model. }"
"{labeldir | ../data/label_all.txt | Path of the generated images for one particular .ply model. }"
"{num_class | 4 | Total number of classes of models}"
"{label_class | 0 | Class label of current .ply model}"
"{rgb_use | 0 | Use RGB image or grayscale}";
/* Get parameters from comand line. */
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
parser.about("Generating training data for CNN with triplet loss");
if (parser.has("help"))
{
parser.printMessage();
@ -70,23 +76,25 @@ int main(int argc, char *argv[])
char* p=(char*)labeldir.data();
imglabel.open(p, fstream::app|fstream::out);
bool camera_pov = (true);
/// Create a window
/* Create a window using viz. */
viz::Viz3d myWindow("Coordinate Frame");
/* Set window size as 64*64, we use this scale as default. */
myWindow.setWindowSize(Size(64,64));
/// Add coordinate axes
/* Add coordinate axes. */
myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem());
/* Set background color. */
myWindow.setBackgroundColor(viz::Color::gray());
myWindow.spin();
/// Set background color
/// Let's assume camera has the following properties
/// Create a cloud widget.
/* Create a Mesh widget, loading .ply models. */
viz::Mesh objmesh = viz::Mesh::load(plymodel);
/* Get the center of the generated mesh widget, cause some .ply files. */
Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud);
float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point);
Point3d cam_y_dir(0.0f,0.0f,1.0f);
const char* headerPath = "../data/header_for_";
const char* binaryPath = "../data/binary_";
ViewSphere.createHeader((int)campos.size(), 64, 64, headerPath);
/* Images will be saved as .png files. */
for(int pose = 0; pose < (int)campos.size(); pose++){
char* temp = new char;
sprintf (temp,"%d",label_class);
@ -97,17 +105,16 @@ int main(int argc, char *argv[])
filename += ".png";
imglabel << filename << ' ' << (int)(campos.at(pose).x*100) << ' ' << (int)(campos.at(pose).y*100) << ' ' << (int)(campos.at(pose).z*100) << endl;
filename = imagedir + filename;
/// We can get the pose of the cam using makeCameraPoses
/* Get the pose of the camera using makeCameraPoses. */
Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point);
/// We can get the transformation matrix from camera coordinate system to global using
/// - makeTransformToGlobal. We need the axes of the camera
/* Get the transformation matrix from camera coordinate system to global. */
Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose));
viz::WMesh mesh_widget(objmesh);
/// Pose of the widget in camera frame
/* Pose of the widget in camera frame. */
Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f));
/// Pose of the widget in global frame
/* Pose of the widget in global frame. */
Affine3f cloud_pose_global = transform * cloud_pose;
/// Visualize camera frame
/* Visualize camera frame. */
if (!camera_pov)
{
viz::WCameraPosition cpw(1); // Coordinate axes
@ -116,14 +123,16 @@ int main(int argc, char *argv[])
myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose);
}
/// Visualize widget
/* Visualize widget. */
mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0);
myWindow.showWidget("ape", mesh_widget, cloud_pose_global);
/// Set the viewer pose to that of camera
/* Set the viewer pose to that of camera. */
if (camera_pov)
myWindow.setViewerPose(cam_pose);
/* Save screen shot as images. */
myWindow.saveScreenshot(filename);
/* Write images into binary files for further using in CNN training. */
ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use);
}
imglabel.close();

@ -0,0 +1,75 @@
Training data generation using Icosphere {#tutorial_data_generation}
=============
Goal
----
In this tutorial you will learn how to
- Conduct a point cloud of camera view on sphere.
- Generate training images using 3D model.
Code
----
You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_sphereview_data.cpp).
@include cnn_3dobj/samples/demo_sphereview_data.cpp
Explanation
-----------
Here is the general structure of the program:
- Create a window.
@code{.cpp}
viz::Viz3d myWindow("Coordinate Frame");
@endcode
- Set window size as 64*64, we use this scale as default.
@code{.cpp}
myWindow.setWindowSize(Size(64,64));
@endcode
- Add coordinate axes.
@code{.cpp}
myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem());
myWindow.setBackgroundColor(viz::Color::gray());
myWindow.spin();
@endcode
- Create a Mesh widget, loading .ply models.
@code{.cpp}
viz::Mesh objmesh = viz::Mesh::load(plymodel);
@endcode
- Get the center of the generated mesh widget, cause some .ply files.
@code{.cpp}
Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud);
@endcode
- Get the pose of the camera using makeCameraPoses.
@code{.cpp}
Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point);
@endcode
- Get the transformation matrix from camera coordinate system to global.
@code{.cpp}
Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose));
viz::WMesh mesh_widget(objmesh);
@endcode
- Save screen shot as images.
@code{.cpp}
myWindow.saveScreenshot(filename);
@endcode
- Write images into binary files for further using in CNN training.
@code{.cpp}
ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use);
@endcode
Results
-------
Here is collection images created by this demo using 4 model.
![](images_all/1_8.png)

@ -0,0 +1,66 @@
Classify {#tutorial_classify}
===============
Goal
----
In this tutorial you will learn how to
- How to extract feature from an image
- How to extract features from images under a given root path
- How to make a prediction using reference images and target image
Code
----
You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_classify.cpp).
@include cnn_3dobj/samples/demo_classify.cpp
Explanation
-----------
Here is the general structure of the program:
- Initialize a net work with Device.
@code{.cpp}
cv::cnn_3dobj::descriptorExtractor descriptor(device);
@endcode
- Load net with the caffe trained net work parameter and structure.
@code{.cpp}
if (strcmp(mean_file.c_str(), "no") == 0)
descriptor.loadNet(network_forIMG, caffemodel);
else
descriptor.loadNet(network_forIMG, caffemodel, mean_file);
@endcode
- List the file names under a given path.
@code{.cpp}
listDir(src_dir.c_str(), name_gallery, false);
for (unsigned int i = 0; i < name_gallery.size(); i++)
{
name_gallery[i] = src_dir + name_gallery[i];
}
@endcode
- Extract feature from a set of images.
@code{.cpp}
descriptor.extract(img_gallery, feature_reference, feature_blob);
@endcode
- Initialize a matcher which using L2 distance.
@code{.cpp}
cv::BFMatcher matcher(NORM_L2);
std::vector<std::vector<cv::DMatch> > matches;
@endcode
- Have a KNN match on the target and reference images.
@code{.cpp}
matcher.knnMatch(feature_test, feature_reference, matches, num_candidate);
@endcode
- Print features of the reference images.
@code{.cpp}std::cout << std::endl << "---------- Features of target image: " << target_img << "----------" << endl << feature_test << std::endl;
@endcode
Results
-------

@ -0,0 +1,60 @@
Training data generation using Icosphere {#tutorial_model_analysis}
=============
Goal
----
In this tutorial you will learn how to
- Extract feature from particular image.
- Have a meaningful comparation on the extracted feature.
Code
----
You can download the code from [here ](https://github.com/Wangyida/opencv_contrib/blob/cnn_3dobj/samples/demo_model_analysis.cpp).
@include cnn_3dobj/samples/demo_model_analysis.cpp
Explanation
-----------
Here is the general structure of the program:
- Sample which is most closest in pose to reference image and also the same class.
@code{.cpp}
ref_img.push_back(ref_img1);
@endcode
- Sample which is less closest in pose to reference image and also the same class.
@code{.cpp}
ref_img.push_back(ref_img2);
@endcode
- Sample which is very close in pose to reference image but not the same class.
@code{.cpp}
ref_img.push_back(ref_img3);
@endcode
- Initialize a net work with Device.
@code{.cpp}
cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
@endcode
- Load net with the caffe trained net work parameter and structure.
@code{.cpp}
if (strcmp(mean_file.c_str(), "no") == 0)
descriptor.loadNet(network_forIMG, caffemodel);
else
descriptor.loadNet(network_forIMG, caffemodel, mean_file);
@endcode
- Have comparations on the distance between reference image and 3 other images
distance between closest sample and reference image should be smallest and
distance between sample in another class and reference image should be largest.
@code{.cpp}
if (matches[0] < matches[1] && matches[0] < matches[2])
pose_pass = true;
if (matches[1] < matches[2])
class_pass = true;
@endcode
Results
-------

@ -0,0 +1,26 @@
CNN for 3D Object Classification and Pose Estimation {#tutorial_table_of_content_cnn_3dobj}
==========
- @subpage tutorial_data_generation
*Compatibility:* \> OpenCV 3.0.0
*Author:* Yida Wang
You will learn how to generate training images from 3D models with proper poses for CNN training.
- @subpage tutorial_feature_classification
*Compatibility:* \> OpenCV 3.0.0
*Author:* Yida Wang
You will learn how to extract features from images and make a prediction using descriptor.
- @subpage tutorial_model_analysis
*Compatibility:* \> OpenCV 3.0.0
*Author:* Yida Wang
You will learn how to have an analysis on performance of the trained Model.
Loading…
Cancel
Save