add pose information to binary files of label for caffe training

pull/276/head
Wangyida 9 years ago
parent 6f38d89160
commit 4fe5498a45
  1. 10
      modules/cnn_3dobj/README.md
  2. 2
      modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
  3. 2
      modules/cnn_3dobj/samples/CMakeLists.txt
  4. 0
      modules/cnn_3dobj/samples/classifyDB_demo.cpp
  5. 98
      modules/cnn_3dobj/samples/classifyIMG_demo.cpp
  6. 0
      modules/cnn_3dobj/samples/data/label_all.txt
  7. 2
      modules/cnn_3dobj/samples/datatrans_demo.cpp
  8. 18
      modules/cnn_3dobj/samples/feature_extract_demo.cpp
  9. 2
      modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
  10. 2
      modules/cnn_3dobj/src/cnn_classification.cpp
  11. 12
      modules/cnn_3dobj/src/cnn_sphereview.cpp

@ -7,7 +7,7 @@
$ cd <caffe_source_directory>
$ mkdir biuld
$ cd build
$ cmake -D CMAKE_INSTALL_PREFIX=/usr/local ..
$ cmake -D CMAKE_INSTALL_PREFIX=/usr/local ..
$ make all
$ make install
```
@ -68,9 +68,9 @@ $ cd
$ cd <opencv_contrib>/modules/cnn_3dobj/samples/build
```
#Demo2:
###Convert data into leveldb format from folder ../data/images_all for feature extraction afterwards. The leveldb files including all data will be stored in ../data/dbfile. If you will use the OpenCV defined feature extraction process, you could also skip Demo2 for data converting, just run Demo3 after Demo1 for feature extraction because Demo3 also includes the db file converting process before feature extraction.
###Convert data into leveldb format from folder ../data/images_all for feature extraction afterwards. The leveldb files including all data will be stored in ../data/dbfile. If you will use the OpenCV defined feature extraction process, you could also skip Demo2 for data converting, just run Demo3 after Demo1 for feature extraction because Demo3 also includes the db file converting process before feature extraction, but if you want to use demo4 for classification, this demo will be used in advance to generate a file name list for the prediction list.
```
$ ./images2db_test
$ ./datatrans_test
```
==============
@ -80,11 +80,11 @@ $ ./images2db_test
$ ./feature_extract_test
```
###This will extract feature from a set of images in a folder as vector<cv::Mat> for further classification and a binary file with containing all feature vectors of each sample.
###After running this, you will get a binary file storing features in ../data/feature folder, I can provide a Matlab script reading this file if someone need it. If you don't need the binary file, the feature could also be stored in vector<cv::Mat> for directly classification using the softmax layer as shown in Demo4.
###After running this, you will get a binary file storing features in ../data/feature folder, I can provide a Matlab script reading this file if someone need it. If you don't need the binary file, the feature could also be stored in vector<cv::Mat>.
==============
#Demo4:
###Classifier, this will extracting the feature of a single image and compare it with features of gallery samples for prediction. Just run:
###Classifier, this will extracting the feature of a single image and compare it with features of gallery samples for prediction. Demo2 should be used in advance to generate a file name list for the prediction list. This demo uses a set of images for feature extraction in a given path, these features will be a reference for prediction on target image. Just run:
```
$ ./classify_test
```

@ -129,7 +129,7 @@ class CV_EXPORTS_W IcoSphere
CV_WRAP static void createHeader(int num_item, int rows, int cols, const char* headerPath);
/** @brief Create header in binary files collecting the image data and label.
*/
CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class);
CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z);
/** @brief Write binary files used for training in other open source project.
*/

@ -16,6 +16,6 @@ set(SOURCES_3 feature_extract_demo.cpp)
add_executable(feature_extract_test ${SOURCES_3})
target_link_libraries(feature_extract_test ${OpenCV_LIBS})
set(SOURCES_4 classify_demo.cpp)
set(SOURCES_4 classifyIMG_demo.cpp)
add_executable(classify_test ${SOURCES_4})
target_link_libraries(classify_test ${OpenCV_LIBS})

@ -0,0 +1,98 @@
/*
* Software License Agreement (BSD License)
*
* Copyright (c) 2009, Willow Garage, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Willow Garage, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <opencv2/cnn_3dobj.hpp>
#include <iomanip>
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
int main(int argc, char** argv)
{
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}"
"{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}"
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
"{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}"
"{label_file | ../data/label_all.txt | A namelist including all gallery images.}"
"{target_img | ../data/images_all/2_13.png | Path of image waiting to be classified.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string src_dir = parser.get<string>("src_dir");
string caffemodel = parser.get<string>("caffemodel");
string network_forIMG = parser.get<string>("network_forIMG");
string mean_file = parser.get<string>("mean_file");
string label_file = parser.get<string>("label_file");
string target_img = parser.get<string>("target_img");
int num_candidate = parser.get<int>("num_candidate");
cv::cnn_3dobj::DataTrans transTemp;
std::vector<string> name_gallery;
transTemp.list_dir(src_dir.c_str(), name_gallery, false);
for (unsigned int i = 0; i < name_gallery.size(); i++) {
name_gallery[i] = src_dir + name_gallery[i];
}
////start another demo
cv::cnn_3dobj::Classification classifier(network_forIMG, caffemodel, mean_file, label_file);
std::vector<cv::Mat> feature_reference;
for (unsigned int i = 0; i < name_gallery.size(); i++) {
cv::Mat img_gallery = cv::imread(name_gallery[i], -1);
feature_reference.push_back(classifier.feature_extract(img_gallery, false));
}
std::cout << std::endl << "---------- Prediction for "
<< target_img << " ----------" << std::endl;
cv::Mat img = cv::imread(target_img, -1);
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl;
std::vector<std::pair<string, float> > prediction;
for (unsigned int i = 0; i < feature_reference.size(); i++)
std::cout << feature_reference[i].t() << endl;
cv::Mat feature_test = classifier.feature_extract(img, false);
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test.t() << std::endl;
prediction = classifier.Classify(feature_reference, img, num_candidate, false);
// Print the top N prediction.
std::cout << std::endl << "---------- Prediction result(distance - file name in gallery) ----------" << std::endl;
for (size_t i = 0; i < prediction.size(); ++i) {
std::pair<string, float> p = prediction[i];
std::cout << std::fixed << std::setprecision(2) << p.second << " - \""
<< p.first << "\"" << std::endl;
}
return 0;
}

@ -60,5 +60,5 @@ int main(int argc, char* argv[])
int height = parser.get<int>("height");
cv::cnn_3dobj::DataTrans Trans;
Trans.convert(src_dir,src_dst,attach_dir,channel,width,height);
std::cout << std::endl << "All images in: " << std::endl << src_dir << std::endl << "have been converted to levelDB data in: " << std::endl << src_dst << std::endl << "for extracting feature of gallery images in classification step efficiently, this convertion is not needed in feature extraction of test image" << std::endl;
std::cout << std::endl << "All featrues of images in: " << std::endl << src_dir << std::endl << "have been converted to levelDB data in: " << std::endl << src_dst << std::endl << "for extracting feature of gallery images in classification efficiently, this convertion is not needed in feature extraction of test image" << std::endl;
}

@ -66,11 +66,11 @@ int main(int argc, char* argv[])
"{channel | 1 | Channel of the images. }"
"{width | 64 | Width of images}"
"{height | 64 | Height of images}"
"{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}"
"{network_forDB | ../data/3d_triplet_galleryIMG.prototxt | network definition in .prototxt the path of the training samples must be wrotten in in .prototxt files in Phase TEST}"
"{featurename_bin | ../data/feature/feature_iter_10000.bin | the output of the extracted feature in form of binary files together with the vector<cv::Mat> features as the feature.}"
"{pretrained_binary_proto | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}"
"{feature_extraction_proto | ../data/3d_triplet_train_test.prototxt | network definition in .prototxt the path of the training samples must be wrotten in in .prototxt files in Phase TEST}"
"{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | the output of the extracted feature in form of binary files together with the vector<cv::Mat> features as the feature.}"
"{extract_feature_blob_names | feat | the layer used for feature extraction in CNN.}"
"{num_mini_batches | 4 | batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}"
"{num_mini_batches | 6 | batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
cv::CommandLineParser parser(argc, argv, keys);
@ -86,16 +86,14 @@ int main(int argc, char* argv[])
int channel = parser.get<int>("channel");
int width = parser.get<int>("width");
int height = parser.get<int>("height");
string caffemodel = parser.get<string>("caffemodel");
string network_forDB = parser.get<string>("network_forDB");
string featurename_bin = parser.get<string>("featurename_bin");
string pretrained_binary_proto = parser.get<string>("pretrained_binary_proto");
string feature_extraction_proto = parser.get<string>("feature_extraction_proto");
string save_feature_dataset_names = parser.get<string>("save_feature_dataset_names");
string extract_feature_blob_names = parser.get<string>("extract_feature_blob_names");
int num_mini_batches = parser.get<int>("num_mini_batches");
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
cv::cnn_3dobj::DataTrans transTemp;
transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height);
std::cout << std::endl << "All images in: " << std::endl << src_dir << std::endl << "have been converted to levelDB data in: " << std::endl << src_dst << std::endl << "for extracting feature of gallery images in classification step efficiently, this convertion is not needed in feature extraction of test image" << std::endl;
std::vector<cv::Mat> extractedFeature = transTemp.feature_extraction_pipeline(caffemodel, network_forDB, featurename_bin, extract_feature_blob_names, num_mini_batches, device, dev_id);
std::cout << std::endl << "All featrues of images in: " << std::endl << src_dir << std::endl << "have been extracted as binary file(using levelDB data) in:" << std::endl << featurename_bin << std::endl << "for analysis in Matlab and other software, this function also outputting a vector<cv::Mat> format gallery feature used for classificatioin.";
std::vector<cv::Mat> extractedFeature = transTemp.feature_extraction_pipeline(pretrained_binary_proto, feature_extraction_proto, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id);
}

@ -120,7 +120,7 @@ int main(int argc, char *argv[]){
if (camera_pov)
myWindow.setViewerPose(cam_pose);
myWindow.saveScreenshot(filename);
ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class);
ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100));
}
imglabel.close();
return 1;

@ -66,7 +66,7 @@ namespace cnn_3dobj
for (unsigned int i = 0; i < reference.size(); i++) {
cv::Mat f1 = reference.at(i);
cv::Mat f2 = feature;
cv::Mat output_temp = f1.t()-f2;
cv::Mat output_temp = f1-f2;
output.push_back(cv::norm(output_temp));
}
std::vector<int> maxN = Argmax(output, N);

@ -171,7 +171,7 @@ namespace cnn_3dobj
int headerimg[4] = {2051,num_item,rows,cols};
for (int i=0; i<4; i++)
headerimg[i] = swap_endian(headerimg[i]);
int headerlabel[2] = {2049,num_item};
int headerlabel[2] = {2050,num_item};
for (int i=0; i<2; i++)
headerlabel[i] = swap_endian(headerlabel[i]);
headerImg.write(reinterpret_cast<const char*>(headerimg), sizeof(int)*4);
@ -180,7 +180,7 @@ namespace cnn_3dobj
headerLabel.close();
};
void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class)
void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z)
{
int isrgb = 0;
cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);
@ -217,8 +217,8 @@ namespace cnn_3dobj
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
unsigned char templab = (unsigned char)label_class;
lab_file << templab;
signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
}
else
{
@ -231,8 +231,8 @@ namespace cnn_3dobj
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
unsigned char templab = (unsigned char)label_class;
lab_file << templab;
signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
}
img_file.close();
lab_file.close();

Loading…
Cancel
Save