remove leveldb dependency, using Input/OutputArray for feature extraction, add the newest model, format fix for OpenCV

pull/276/head
Wangyida 9 years ago
parent 4fe5498a45
commit b831fc3bad
  1. 4
      modules/cnn_3dobj/CMakeLists.txt
  2. 18
      modules/cnn_3dobj/README.md
  3. 55
      modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
  4. 16
      modules/cnn_3dobj/samples/CMakeLists.txt
  5. 112
      modules/cnn_3dobj/samples/classifyDB_demo.cpp
  6. 49
      modules/cnn_3dobj/samples/classifyIMG_demo.cpp
  7. 94
      modules/cnn_3dobj/samples/data/3d_triplet_galleryIMG.prototxt
  8. BIN
      modules/cnn_3dobj/samples/data/3d_triplet_iter_10000.caffemodel
  9. BIN
      modules/cnn_3dobj/samples/data/3d_triplet_iter_20000.caffemodel
  10. 64
      modules/cnn_3dobj/samples/datatrans_demo.cpp
  11. 99
      modules/cnn_3dobj/samples/feature_extract_demo.cpp
  12. 422
      modules/cnn_3dobj/src/cnn_classification.cpp
  13. 237
      modules/cnn_3dobj/src/cnn_datatrans.cpp
  14. 457
      modules/cnn_3dobj/src/cnn_sphereview.cpp

@ -1,3 +1,3 @@
set(the_description "CNN for 3D object recognition and pose estimation including a completed Sphere View on 3D objects")
ocv_define_module(cnn_3dobj opencv_core opencv_imgproc opencv_viz opencv_highgui caffe protobuf leveldb glog OPTIONAL WRAP python)
target_link_libraries(opencv_cnn_3dobj caffe protobuf leveldb glog)
ocv_define_module(cnn_3dobj opencv_core opencv_imgproc opencv_viz opencv_highgui caffe protobuf glog OPTIONAL WRAP python)
target_link_libraries(opencv_cnn_3dobj caffe protobuf glog)

@ -63,27 +63,11 @@ $ ./examples/triplet/train_3d_triplet.sh
###After doing this, you will get .caffemodel files as the trained net work. I have already provide the net definition .prototxt files and the trained .caffemodel in <opencv_contrib>/modules/cnn_3dobj/samples/build folder, you could just use them without training in caffe. If you are not interested on feature analysis with the help of binary files provided in Demo2, just skip to Demo3 for feature extraction or Demo4 for classifier.
==============
#Demo4:
```
$ cd
$ cd <opencv_contrib>/modules/cnn_3dobj/samples/build
```
#Demo2:
###Convert data into leveldb format from folder ../data/images_all for feature extraction afterwards. The leveldb files including all data will be stored in ../data/dbfile. If you will use the OpenCV defined feature extraction process, you could also skip Demo2 for data converting, just run Demo3 after Demo1 for feature extraction because Demo3 also includes the db file converting process before feature extraction, but if you want to use demo4 for classification, this demo will be used in advance to generate a file name list for the prediction list.
```
$ ./datatrans_test
```
==============
#Demo3:
###feature extraction, this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe and outputting a binary file including all extracted feature.
```
$ ./feature_extract_test
```
###This will extract feature from a set of images in a folder as vector<cv::Mat> for further classification and a binary file with containing all feature vectors of each sample.
###After running this, you will get a binary file storing features in ../data/feature folder, I can provide a Matlab script reading this file if someone need it. If you don't need the binary file, the feature could also be stored in vector<cv::Mat>.
==============
#Demo4:
###Classifier, this will extracting the feature of a single image and compare it with features of gallery samples for prediction. Demo2 should be used in advance to generate a file name list for the prediction list. This demo uses a set of images for feature extraction in a given path, these features will be a reference for prediction on target image. Just run:
```
$ ./classify_test

@ -57,16 +57,13 @@ the use of this software, even if advised of the possibility of such damage.
#include <stdlib.h>
#include <tr1/memory>
#include <dirent.h>
#include <glog/logging.h>
#include <google/protobuf/text_format.h>
#include <leveldb/db.h>
#define CPU_ONLY
#include <caffe/blob.hpp>
#include <caffe/common.hpp>
#include <caffe/net.hpp>
#include <caffe/proto/caffe.pb.h>
#include <caffe/util/io.hpp>
#include <caffe/vision_layers.hpp>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"
#include "opencv2/viz/vizcore.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/highgui/highgui_c.h"
@ -135,33 +132,6 @@ class CV_EXPORTS_W IcoSphere
};
class CV_EXPORTS_W DataTrans
{
private:
std::set<string> all_class_name;
std::map<string,int> class2id;
public:
DataTrans();
CV_WRAP void list_dir(const char *path,std::vector<string>& files,bool r);
/** @brief Use directory of the file including images starting with an int label as the name of each image.
*/
CV_WRAP string get_classname(string path);
/** @brief
*/
CV_WRAP int get_labelid(string fileName);
/** @brief Get the label of each image.
*/
CV_WRAP void loadimg(string path,char* buffer,bool is_color);
/** @brief Load images.
*/
CV_WRAP void convert(string imgdir,string outputdb,string attachdir,int channel,int width,int height);
/** @brief Convert a set of images as a leveldb database for CNN training.
*/
CV_WRAP std::vector<cv::Mat> feature_extraction_pipeline(std::string pretrained_binary_proto, std::string feature_extraction_proto, std::string save_feature_dataset_names, std::string extract_feature_blob_names, int num_mini_batches, std::string device, int dev_id);
/** @brief Extract feature into a binary file and vector<cv::Mat> for classification, the model proto and network proto are needed, All images in the file root will be used for feature extraction.
*/
};
class CV_EXPORTS_W Classification
{
private:
@ -180,13 +150,20 @@ class CV_EXPORTS_W Classification
/** @brief Convert the input image to the input image format of the network.
*/
public:
Classification(const string& model_file, const string& trained_file, const string& mean_file, const string& label_file);
Classification();
void list_dir(const char *path,std::vector<string>& files,bool r);
/** @brief Get the file name from a root dictionary.
*/
void NetSetter(const string& model_file, const string& trained_file, const string& mean_file, const string& cpu_only, int device_id);
/** @brief Initiate a classification structure.
*/
std::vector<std::pair<string, float> > Classify(const std::vector<cv::Mat>& reference, const cv::Mat& img, int N = 4, bool mean_substract = false);
void GetLabellist(const std::vector<string>& name_gallery);
/** @brief Get the label of the gallery images for result displaying in prediction.
*/
std::vector<std::pair<string, float> > Classify(const cv::Mat& reference, const cv::Mat& img, int N, bool mean_substract = false);
/** @brief Make a classification.
*/
cv::Mat feature_extract(const cv::Mat& img, bool mean_subtract);
void FeatureExtract(InputArray inputimg, OutputArray feature, bool mean_subtract);
/** @brief Extract a single featrue of one image.
*/
std::vector<int> Argmax(const std::vector<float>& v, int N);

@ -3,19 +3,11 @@ SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb ")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
project(sphereview_test)
find_package(OpenCV REQUIRED)
set(SOURCES_1 sphereview_3dobj_demo.cpp)
set(SOURCES_generator sphereview_3dobj_demo.cpp)
include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(sphereview_test ${SOURCES_1})
add_executable(sphereview_test ${SOURCES_generator})
target_link_libraries(sphereview_test ${OpenCV_LIBS})
set(SOURCES_2 datatrans_demo.cpp)
add_executable(datatrans_test ${SOURCES_2})
target_link_libraries(datatrans_test ${OpenCV_LIBS})
set(SOURCES_3 feature_extract_demo.cpp)
add_executable(feature_extract_test ${SOURCES_3})
target_link_libraries(feature_extract_test ${OpenCV_LIBS})
set(SOURCES_4 classifyIMG_demo.cpp)
add_executable(classify_test ${SOURCES_4})
set(SOURCES_classifier classifyIMG_demo.cpp)
add_executable(classify_test ${SOURCES_classifier})
target_link_libraries(classify_test ${OpenCV_LIBS})

@ -1,112 +0,0 @@
/*
* Software License Agreement (BSD License)
*
* Copyright (c) 2009, Willow Garage, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Willow Garage, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <opencv2/cnn_3dobj.hpp>
#include <iomanip>
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
int main(int argc, char** argv)
{
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being converted to leveldb dataset.}"
"{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }"
"{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }"
"{channel | 1 | Channel of the images. }"
"{width | 64 | Width of images}"
"{height | 64 | Height of images}"
"{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}"
"{network_forDB | ../data/3d_triplet_galleryIMG.prototxt | Network definition file used for extracting feature from levelDB data, causion: the path of levelDB training samples must be wrotten in in .prototxt files in Phase TEST}"
"{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | Output of the extracted feature in form of binary files together with the vector<cv::Mat> features as the feature.}"
"{extract_feature_blob_names | feat | Layer used for feature extraction in CNN.}"
"{num_mini_batches | 4 | Batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}"
"{device | CPU | Device: CPU or GPU.}"
"{dev_id | 0 | ID of GPU.}"
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
"{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}"
"{label_file | ../data/dbfileimage_filename | A namelist including all gallery images.}"
"{target_img | ../data/images_all/2_13.png | Path of image waiting to be classified.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string src_dir = parser.get<string>("src_dir");
string src_dst = parser.get<string>("src_dst");
string attach_dir = parser.get<string>("attach_dir");
int channel = parser.get<int>("channel");
int width = parser.get<int>("width");
int height = parser.get<int>("height");
string caffemodel = parser.get<string>("caffemodel");
string network_forDB = parser.get<string>("network_forDB");
string save_feature_dataset_names = parser.get<string>("save_feature_dataset_names");
string extract_feature_blob_names = parser.get<string>("extract_feature_blob_names");
int num_mini_batches = parser.get<int>("num_mini_batches");
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
string network_forIMG = parser.get<string>("network_forIMG");
string mean_file = parser.get<string>("mean_file");
string label_file = parser.get<string>("label_file");
string target_img = parser.get<string>("target_img");
int num_candidate = parser.get<int>("num_candidate");
cv::cnn_3dobj::DataTrans transTemp;
transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height);
std::vector<cv::Mat> feature_reference = transTemp.feature_extraction_pipeline(caffemodel, network_forDB, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id);
////start another demo
cv::cnn_3dobj::Classification classifier(network_forIMG, caffemodel, mean_file, label_file);
std::cout << std::endl << "---------- Prediction for "
<< target_img << " ----------" << std::endl;
cv::Mat img = cv::imread(target_img, -1);
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl;
std::vector<std::pair<string, float> > prediction;
for (unsigned int i = 0; i < feature_reference.size(); i++)
std::cout << feature_reference[i] << endl;
cv::Mat feature_test = classifier.feature_extract(img, false);
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test.t() << std::endl;
prediction = classifier.Classify(feature_reference, img, num_candidate, false);
// Print the top N prediction.
std::cout << std::endl << "---------- Prediction result(distance - file name in gallery) ----------" << std::endl;
for (size_t i = 0; i < prediction.size(); ++i) {
std::pair<string, float> p = prediction[i];
std::cout << std::fixed << std::setprecision(2) << p.second << " - \""
<< p.first << "\"" << std::endl;
}
return 0;
}

@ -44,36 +44,40 @@ int main(int argc, char** argv)
"{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}"
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
"{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}"
"{label_file | ../data/label_all.txt | A namelist including all gallery images.}"
"{target_img | ../data/images_all/2_13.png | Path of image waiting to be classified.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}";
"{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
parser.printMessage();
return 0;
}
string src_dir = parser.get<string>("src_dir");
string caffemodel = parser.get<string>("caffemodel");
string network_forIMG = parser.get<string>("network_forIMG");
string mean_file = parser.get<string>("mean_file");
string label_file = parser.get<string>("label_file");
string target_img = parser.get<string>("target_img");
int num_candidate = parser.get<int>("num_candidate");
cv::cnn_3dobj::DataTrans transTemp;
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
cv::cnn_3dobj::Classification classifier;
classifier.NetSetter(network_forIMG, caffemodel, mean_file, device, dev_id);
std::vector<string> name_gallery;
transTemp.list_dir(src_dir.c_str(), name_gallery, false);
classifier.list_dir(src_dir.c_str(), name_gallery, false);
classifier.GetLabellist(name_gallery);
for (unsigned int i = 0; i < name_gallery.size(); i++) {
name_gallery[i] = src_dir + name_gallery[i];
name_gallery[i] = src_dir + name_gallery[i];
}
////start another demo
cv::cnn_3dobj::Classification classifier(network_forIMG, caffemodel, mean_file, label_file);
std::vector<cv::Mat> feature_reference;
std::vector<cv::Mat> img_gallery;
cv::Mat feature_reference;
for (unsigned int i = 0; i < name_gallery.size(); i++) {
cv::Mat img_gallery = cv::imread(name_gallery[i], -1);
feature_reference.push_back(classifier.feature_extract(img_gallery, false));
img_gallery.push_back(cv::imread(name_gallery[i], -1));
}
classifier.FeatureExtract(img_gallery, feature_reference, false);
std::cout << std::endl << "---------- Prediction for "
<< target_img << " ----------" << std::endl;
@ -82,17 +86,18 @@ int main(int argc, char** argv)
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl;
std::vector<std::pair<string, float> > prediction;
for (unsigned int i = 0; i < feature_reference.size(); i++)
std::cout << feature_reference[i].t() << endl;
cv::Mat feature_test = classifier.feature_extract(img, false);
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test.t() << std::endl;
for (unsigned int i = 0; i < feature_reference.rows; i++)
std::cout << feature_reference.row(i) << endl;
cv::Mat feature_test;
classifier.FeatureExtract(img, feature_test, false);
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl;
prediction = classifier.Classify(feature_reference, img, num_candidate, false);
// Print the top N prediction.
std::cout << std::endl << "---------- Prediction result(distance - file name in gallery) ----------" << std::endl;
std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
for (size_t i = 0; i < prediction.size(); ++i) {
std::pair<string, float> p = prediction[i];
std::cout << std::fixed << std::setprecision(2) << p.second << " - \""
<< p.first << "\"" << std::endl;
std::pair<string, float> p = prediction[i];
std::cout << std::fixed << std::setprecision(2) << p.second << " - \""
<< p.first << "\"" << std::endl;
}
return 0;
}

@ -1,94 +0,0 @@
name: "3d_triplet"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
data_param {
source: "/home/wangyida/Desktop/opencv_contrib/modules/nouse_test/samples/data/dbfile"
batch_size: 69
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
convolution_param {
num_output: 16
kernel_size: 8
stride: 1
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "pool1"
top: "pool1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
convolution_param {
num_output: 7
kernel_size: 5
stride: 1
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "pool2"
top: "pool2"
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
inner_product_param {
num_output: 256
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "feat"
type: "InnerProduct"
bottom: "ip1"
top: "feat"
inner_product_param {
num_output: 4
}
}

@ -1,64 +0,0 @@
/*
* Software License Agreement (BSD License)
*
* Copyright (c) 2009, Willow Garage, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Willow Garage, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <opencv2/cnn_3dobj.hpp>
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
int main(int argc, char* argv[])
{
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}"
"{src_dir | ../data/images_all | Source direction of the images ready for being converted to leveldb dataset.}"
"{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }"
"{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }"
"{channel | 1 | Channel of the images. }"
"{width | 64 | Width of images}"
"{height | 64 | Height of images}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string src_dir = parser.get<string>("src_dir");
string src_dst = parser.get<string>("src_dst");
string attach_dir = parser.get<string>("attach_dir");
int channel = parser.get<int>("channel");
int width = parser.get<int>("width");
int height = parser.get<int>("height");
cv::cnn_3dobj::DataTrans Trans;
Trans.convert(src_dir,src_dst,attach_dir,channel,width,height);
std::cout << std::endl << "All featrues of images in: " << std::endl << src_dir << std::endl << "have been converted to levelDB data in: " << std::endl << src_dst << std::endl << "for extracting feature of gallery images in classification efficiently, this convertion is not needed in feature extraction of test image" << std::endl;
}

@ -1,99 +0,0 @@
/*
* Software License Agreement (BSD License)
*
* Copyright (c) 2009, Willow Garage, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of Willow Garage, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <opencv2/cnn_3dobj.hpp>
#include <stdio.h> // for snprintf
#include <tr1/memory>
#include <string>
#include <vector>
#include "google/protobuf/text_format.h"
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#define CPU_ONLY
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"
using caffe::Blob;
using caffe::Caffe;
using caffe::Datum;
using caffe::Net;
//using boost::shared_ptr;
using std::string;
//namespace db = caffe::db;
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
int main(int argc, char* argv[])
{
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being converted to leveldb dataset.}"
"{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }"
"{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }"
"{channel | 1 | Channel of the images. }"
"{width | 64 | Width of images}"
"{height | 64 | Height of images}"
"{pretrained_binary_proto | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}"
"{feature_extraction_proto | ../data/3d_triplet_train_test.prototxt | network definition in .prototxt the path of the training samples must be wrotten in in .prototxt files in Phase TEST}"
"{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | the output of the extracted feature in form of binary files together with the vector<cv::Mat> features as the feature.}"
"{extract_feature_blob_names | feat | the layer used for feature extraction in CNN.}"
"{num_mini_batches | 6 | batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string src_dir = parser.get<string>("src_dir");
string src_dst = parser.get<string>("src_dst");
string attach_dir = parser.get<string>("attach_dir");
int channel = parser.get<int>("channel");
int width = parser.get<int>("width");
int height = parser.get<int>("height");
string pretrained_binary_proto = parser.get<string>("pretrained_binary_proto");
string feature_extraction_proto = parser.get<string>("feature_extraction_proto");
string save_feature_dataset_names = parser.get<string>("save_feature_dataset_names");
string extract_feature_blob_names = parser.get<string>("extract_feature_blob_names");
int num_mini_batches = parser.get<int>("num_mini_batches");
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
cv::cnn_3dobj::DataTrans transTemp;
transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height);
std::vector<cv::Mat> extractedFeature = transTemp.feature_extraction_pipeline(pretrained_binary_proto, feature_extraction_proto, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id);
}

@ -6,192 +6,236 @@ namespace cv
{
namespace cnn_3dobj
{
Classification::Classification(const string& model_file, const string& trained_file, const string& mean_file, const string& label_file) {
#ifdef CPU_ONLY
caffe::Caffe::set_mode(caffe::Caffe::CPU);
#else
caffe::Caffe::set_mode(caffe::Caffe::GPU);
#endif
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
/* Load the binaryproto mean file. */
SetMean(mean_file);
/* Load labels. */
std::ifstream labels(label_file.c_str());
CHECK(labels) << "Unable to open labels file " << label_file;
string line;
while (std::getline(labels, line))
labels_.push_back(string(line));
/* Blob<float>* output_layer = net_->output_blobs()[0];
CHECK_EQ(labels_.size(), output_layer->channels())
<< "Number of labels is different from the output layer dimension.";*/
}
/*bool Classifier::PairCompare(const std::pair<float, int>& lhs,
const std::pair<float, int>& rhs) {
return lhs.first > rhs.first;
}*/
/* Return the indices of the top N values of vector v. */
std::vector<int> Classification::Argmax(const std::vector<float>& v, int N) {
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end());
std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
}
//Return the top N predictions.
std::vector<std::pair<string, float> > Classification::Classify(const std::vector<cv::Mat>& reference, const cv::Mat& img, int N, bool mean_substract) {
cv::Mat feature = feature_extract(img, mean_substract);
std::vector<float> output;
for (unsigned int i = 0; i < reference.size(); i++) {
cv::Mat f1 = reference.at(i);
cv::Mat f2 = feature;
cv::Mat output_temp = f1-f2;
output.push_back(cv::norm(output_temp));
}
std::vector<int> maxN = Argmax(output, N);
std::vector<std::pair<string, float> > predictions;
for (int i = 0; i < N; ++i) {
int idx = maxN[i];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
}
return predictions;
}
/* Load the mean file in binaryproto format. */
void Classification::SetMean(const string& mean_file) {
BlobProto blob_proto;
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
/* Convert from BlobProto to Blob<float> */
Blob<float> mean_blob;
mean_blob.FromProto(blob_proto);
CHECK_EQ(mean_blob.channels(), num_channels_)
<< "Number of channels of mean file doesn't match input layer.";
/* The format of the mean file is planar 32-bit float BGR or grayscale. */
std::vector<cv::Mat> channels;
float* data = mean_blob.mutable_cpu_data();
for (int i = 0; i < num_channels_; ++i) {
/* Extract an individual channel. */
cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
channels.push_back(channel);
data += mean_blob.height() * mean_blob.width();
}
/* Merge the separate channels into a single image. */
cv::Mat mean;
cv::merge(channels, mean);
/* Compute the global mean pixel value and create a mean image
* filled with this value. */
cv::Scalar channel_mean = cv::mean(mean);
mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
}
cv::Mat Classification::feature_extract(const cv::Mat& img, bool mean_subtract) {
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(1, num_channels_,
input_geometry_.height, input_geometry_.width);
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels);
Preprocess(img, &input_channels, mean_subtract);
net_->ForwardPrefilled();
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
const float* begin = output_layer->cpu_data();
const float* end = begin + output_layer->channels();
//return std::vector<float>(begin, end);
std::vector<float> featureVec = std::vector<float>(begin, end);
cv::Mat feature = cv::Mat(featureVec, true);
return feature;
}
/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Classification::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels(); ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}
void Classification::Preprocess(const cv::Mat& img,
std::vector<cv::Mat>* input_channels, bool mean_subtract) {
/* Convert the input image to the input image format of the network. */
cv::Mat sample;
if (img.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img, sample, CV_BGR2GRAY);
else if (img.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img, sample, CV_BGRA2GRAY);
else if (img.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img, sample, CV_BGRA2BGR);
else if (img.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img, sample, CV_GRAY2BGR);
else
sample = img;
cv::Mat sample_resized;
if (sample.size() != input_geometry_)
cv::resize(sample, sample_resized, input_geometry_);
else
sample_resized = sample;
cv::Mat sample_float;
if (num_channels_ == 3)
sample_resized.convertTo(sample_float, CV_32FC3);
else
sample_resized.convertTo(sample_float, CV_32FC1);
cv::Mat sample_normalized;
if (mean_subtract)
cv::subtract(sample_float, mean_, sample_normalized);
else
sample_normalized = sample_float;
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
cv::split(sample_normalized, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
}
}}
Classification::Classification(){};
void Classification::list_dir(const char *path,vector<string>& files,bool r)
{
DIR *pDir;
struct dirent *ent;
char childpath[512];
pDir = opendir(path);
memset(childpath, 0, sizeof(childpath));
while ((ent = readdir(pDir)) != NULL)
{
if (ent->d_type & DT_DIR)
{
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
{
continue;
}
if(r)
{
sprintf(childpath, "%s/%s", path, ent->d_name);
Classification::list_dir(childpath,files,false);
}
}
else
{
files.push_back(ent->d_name);
}
}
sort(files.begin(),files.end());
};
void Classification::NetSetter(const string& model_file, const string& trained_file, const string& mean_file, const string& cpu_only, int device_id)
{
if (strcmp(cpu_only.c_str(), "CPU") == 0)
{
caffe::Caffe::set_mode(caffe::Caffe::CPU);
}
else
{
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(device_id);
}
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
/* Load the binaryproto mean file. */
SetMean(mean_file);
};
void Classification::GetLabellist(const std::vector<string>& name_gallery)
{
for (unsigned int i = 0; i < name_gallery.size(); ++i)
labels_.push_back(name_gallery[i]);
};
/* Return the indices of the top N values of vector v. */
std::vector<int> Classification::Argmax(const std::vector<float>& v, int N)
{
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end());
std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
};
//Return the top N predictions.
std::vector<std::pair<string, float> > Classification::Classify(const cv::Mat& reference, const cv::Mat& img, int N, bool mean_substract)
{
cv::Mat feature;
Classification::FeatureExtract(img, feature, mean_substract);
std::vector<float> output;
for (int i = 0; i < reference.rows; i++)
{
cv::Mat f1 = reference.row(i);
cv::Mat f2 = feature;
cv::Mat output_temp = f1-f2;
output.push_back(cv::norm(output_temp));
}
std::vector<int> maxN = Argmax(output, N);
std::vector<std::pair<string, float> > predictions;
for (int i = 0; i < N; ++i)
{
int idx = maxN[i];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
}
return predictions;
};
/* Load the mean file in binaryproto format. */
void Classification::SetMean(const string& mean_file)
{
BlobProto blob_proto;
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
/* Convert from BlobProto to Blob<float> */
Blob<float> mean_blob;
mean_blob.FromProto(blob_proto);
CHECK_EQ(mean_blob.channels(), num_channels_)
<< "Number of channels of mean file doesn't match input layer.";
/* The format of the mean file is planar 32-bit float BGR or grayscale. */
std::vector<cv::Mat> channels;
float* data = mean_blob.mutable_cpu_data();
for (int i = 0; i < num_channels_; ++i)
{
/* Extract an individual channel. */
cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
channels.push_back(channel);
data += mean_blob.height() * mean_blob.width();
}
/* Merge the separate channels into a single image. */
cv::Mat mean;
cv::merge(channels, mean);
/* Compute the global mean pixel value and create a mean image
* filled with this value. */
cv::Scalar channel_mean = cv::mean(mean);
mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
};
void Classification::FeatureExtract(InputArray inputimg, OutputArray feature, bool mean_subtract)
{
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(1, num_channels_,
input_geometry_.height, input_geometry_.width);
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels);
if (inputimg.kind() == 65536)
{/* this is a Mat */
Mat img = inputimg.getMat();
Preprocess(img, &input_channels, mean_subtract);
net_->ForwardPrefilled();
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
const float* begin = output_layer->cpu_data();
const float* end = begin + output_layer->channels();
std::vector<float> featureVec = std::vector<float>(begin, end);
cv::Mat feature_mat = cv::Mat(featureVec, true).t();
feature_mat.copyTo(feature);
}
else
{/* This is a vector<Mat> */
vector<Mat> img;
inputimg.getMatVector(img);
Mat feature_vector;
for (unsigned int i = 0; i < img.size(); ++i)
{
Preprocess(img[i], &input_channels, mean_subtract);
net_->ForwardPrefilled();
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
const float* begin = output_layer->cpu_data();
const float* end = begin + output_layer->channels();
std::vector<float> featureVec = std::vector<float>(begin, end);
if (i == 0)
{
feature_vector = cv::Mat(featureVec, true).t();
int dim_feature = feature_vector.cols;
feature_vector.resize(img.size(), dim_feature);
}
feature_vector.row(i) = cv::Mat(featureVec, true).t();
}
feature_vector.copyTo(feature);
}
};
/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Classification::WrapInputLayer(std::vector<cv::Mat>* input_channels)
{
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels(); ++i)
{
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
};
void Classification::Preprocess(const cv::Mat& img,
std::vector<cv::Mat>* input_channels, bool mean_subtract)
{
/* Convert the input image to the input image format of the network. */
cv::Mat sample;
if (img.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img, sample, CV_BGR2GRAY);
else if (img.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img, sample, CV_BGRA2GRAY);
else if (img.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img, sample, CV_BGRA2BGR);
else if (img.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img, sample, CV_GRAY2BGR);
else
sample = img;
cv::Mat sample_resized;
if (sample.size() != input_geometry_)
cv::resize(sample, sample_resized, input_geometry_);
else
sample_resized = sample;
cv::Mat sample_float;
if (num_channels_ == 3)
sample_resized.convertTo(sample_float, CV_32FC3);
else
sample_resized.convertTo(sample_float, CV_32FC1);
cv::Mat sample_normalized;
if (mean_subtract)
cv::subtract(sample_float, mean_, sample_normalized);
else
sample_normalized = sample_float;
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
cv::split(sample_normalized, *input_channels);
CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
== net_->input_blobs()[0]->cpu_data())
<< "Input channels are not wrapping the input layer of the network.";
};
}
}

@ -1,237 +0,0 @@
#include "precomp.hpp"
using std::string;
using namespace std;
namespace cv
{
namespace cnn_3dobj
{
DataTrans::DataTrans()
{
};
void DataTrans::list_dir(const char *path,vector<string>& files,bool r)
{
DIR *pDir;
struct dirent *ent;
char childpath[512];
pDir = opendir(path);
memset(childpath, 0, sizeof(childpath));
while ((ent = readdir(pDir)) != NULL)
{
if (ent->d_type & DT_DIR)
{
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
{
continue;
}
if(r)
{
sprintf(childpath, "%s/%s", path, ent->d_name);
DataTrans::list_dir(childpath,files,false);
}
}
else
{
files.push_back(ent->d_name);
}
}
sort(files.begin(),files.end());
};
string DataTrans::get_classname(string path)
{
int index = path.find_last_of('_');
return path.substr(0, index);
}
int DataTrans::get_labelid(string fileName)
{
string class_name_tmp = get_classname(fileName);
all_class_name.insert(class_name_tmp);
map<string,int>::iterator name_iter_tmp = class2id.find(class_name_tmp);
if (name_iter_tmp == class2id.end())
{
int id = class2id.size();
class2id.insert(name_iter_tmp, std::make_pair(class_name_tmp, id));
return id;
}
else
{
return name_iter_tmp->second;
}
}
void DataTrans::loadimg(string path,char* buffer,const bool is_color)
{
cv::Mat img = cv::imread(path, is_color);
string val;
int rows = img.rows;
int cols = img.cols;
int pos=0;
int channel;
if (is_color == 0)
{
channel = 1;
}else{
channel = 3;
}
for (int c = 0; c < channel; c++)
{
for (int row = 0; row < rows; row++)
{
for (int col = 0; col < cols; col++)
{
buffer[pos++]=img.at<cv::Vec3b>(row,col)[c];
}
}
}
};
void DataTrans::convert(string imgdir,string outputdb,string attachdir,int channel,int width,int height)
{
leveldb::DB* db;
leveldb::Options options;
options.create_if_missing = true;
// options.error_if_exists = true;
caffe::Datum datum;
datum.set_channels(channel);
datum.set_height(height);
datum.set_width(width);
int image_size = channel*width*height;
char buffer[image_size];
string value;
CHECK(leveldb::DB::Open(options, outputdb, &db).ok());
vector<string> filenames;
list_dir(imgdir.c_str(),filenames, false);
string img_log = attachdir+"image_filename";
ofstream writefile(img_log.c_str());
for(int i=0;i<(int)filenames.size();i++)
{
string path= imgdir;
path.append(filenames[i]);
loadimg(path,buffer,false);
int labelid = get_labelid(filenames[i]);
datum.set_label(labelid);
datum.set_data(buffer,image_size);
datum.SerializeToString(&value);
snprintf(buffer, image_size, "%05d", i);
printf("\nclassid:%d classname:%s abspath:%s",labelid,get_classname(filenames[i]).c_str(),path.c_str());
db->Put(leveldb::WriteOptions(),string(buffer),value);
//printf("%d %s\n",i,fileNames[i].c_str());
assert(writefile.is_open());
writefile<<i<<" "<<filenames[i]<<"\n";
}
delete db;
writefile.close();
img_log = attachdir+"image_classname";
writefile.open(img_log.c_str());
set<string>::iterator iter = all_class_name.begin();
while(iter != all_class_name.end())
{
assert(writefile.is_open());
writefile<<(*iter)<<"\n";
//printf("%s\n",(*iter).c_str());
iter++;
}
writefile.close();
};
std::vector<cv::Mat> DataTrans::feature_extraction_pipeline(std::string pretrained_binary_proto, std::string feature_extraction_proto, std::string save_feature_dataset_names, std::string extract_feature_blob_names, int num_mini_batches, std::string device, int dev_id) {
if (strcmp(device.c_str(), "GPU") == 0) {
LOG(ERROR)<< "Using GPU";
int device_id = 0;
if (strcmp(device.c_str(), "GPU") == 0) {
device_id = dev_id;
CHECK_GE(device_id, 0);
}
LOG(ERROR) << "Using Device_id=" << device_id;
Caffe::SetDevice(device_id);
Caffe::set_mode(Caffe::GPU);
} else {
LOG(ERROR) << "Using CPU";
Caffe::set_mode(Caffe::CPU);
}
boost::shared_ptr<Net<float> > feature_extraction_net(
new Net<float>(feature_extraction_proto, caffe::TEST));
feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto);
std::vector<std::string> blob_names;
blob_names.push_back(extract_feature_blob_names);
std::vector<std::string> dataset_names;
dataset_names.push_back(save_feature_dataset_names);
CHECK_EQ(blob_names.size(), dataset_names.size()) <<
" the number of blob names and dataset names must be equal";
size_t num_features = blob_names.size();
for (size_t i = 0; i < num_features; i++) {
CHECK(feature_extraction_net->has_blob(blob_names[i]))
<< "Unknown feature blob name " << blob_names[i]
<< " in the network " << feature_extraction_proto;
}
std::vector<FILE*> files;
for (size_t i = 0; i < num_features; ++i)
{
LOG(INFO) << "Opening file " << dataset_names[i];
FILE * temp = fopen(dataset_names[i].c_str(), "wb");
files.push_back(temp);
}
LOG(ERROR)<< "Extacting Features";
Datum datum;
std::vector<cv::Mat> featureVec;
std::vector<Blob<float>*> input_vec;
std::vector<int> image_indices(num_features, 0);
for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) {
feature_extraction_net->Forward(input_vec);
for (size_t i = 0; i < num_features; ++i) {
const boost::shared_ptr<Blob<float> > feature_blob = feature_extraction_net
->blob_by_name(blob_names[i]);
int batch_size = feature_blob->num();
int dim_features = feature_blob->count() / batch_size;
if (batch_index == 0)
{
int fea_num = batch_size*num_mini_batches;
fwrite(&dim_features, sizeof(int), 1, files[i]);
fwrite(&fea_num, sizeof(int), 1, files[i]);
}
const float* feature_blob_data;
for (int n = 0; n < batch_size; ++n) {
feature_blob_data = feature_blob->cpu_data() +
feature_blob->offset(n);
fwrite(feature_blob_data, sizeof(float), dim_features, files[i]);
cv::Mat tempfeat = cv::Mat(1, dim_features, CV_32FC1);
for (int dim = 0; dim < dim_features; dim++) {
tempfeat.at<float>(0,dim) = *(feature_blob_data++);
}
featureVec.push_back(tempfeat);
++image_indices[i];
if (image_indices[i] % 1000 == 0) {
LOG(ERROR)<< "Extracted features of " << image_indices[i] <<
" query images for feature blob " << blob_names[i];
}
} // for (int n = 0; n < batch_size; ++n)
} // for (int i = 0; i < num_features; ++i)
} // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index)
// write the last batch
for (size_t i = 0; i < num_features; ++i) {
fclose(files[i]);
}
LOG(ERROR)<< "Successfully extracted the features!";
return featureVec;
};
}}

@ -6,235 +6,230 @@ namespace cv
{
namespace cnn_3dobj
{
IcoSphere::IcoSphere(float radius_in, int depth_in)
{
X = 0.5f;
Z = 0.5f;
X *= (int)radius_in;
Z *= (int)radius_in;
diff = 0.00000005964;
float vdata[12][3] = { { -X, 0.0f, Z }, { X, 0.0f, Z },
{ -X, 0.0f, -Z }, { X, 0.0f, -Z }, { 0.0f, Z, X }, { 0.0f, Z, -X },
{ 0.0f, -Z, X }, { 0.0f, -Z, -X }, { Z, X, 0.0f }, { -Z, X, 0.0f },
{ Z, -X, 0.0f }, { -Z, -X, 0.0f } };
int tindices[20][3] = { { 0, 4, 1 }, { 0, 9, 4 }, { 9, 5, 4 },
{ 4, 5, 8 }, { 4, 8, 1 }, { 8, 10, 1 }, { 8, 3, 10 }, { 5, 3, 8 },
{ 5, 2, 3 }, { 2, 7, 3 }, { 7, 10, 3 }, { 7, 6, 10 }, { 7, 11, 6 },
{ 11, 0, 6 }, { 0, 1, 6 }, { 6, 1, 10 }, { 9, 0, 11 },
{ 9, 11, 2 }, { 9, 2, 5 }, { 7, 2, 11 } };
// Iterate over points
for (int i = 0; i < 20; ++i) {
subdivide(vdata[tindices[i][0]], vdata[tindices[i][1]],
vdata[tindices[i][2]], depth_in);
}
CameraPos_temp.push_back(CameraPos[0]);
for (int j = 1; j<int(CameraPos.size()); j++)
{
for (int k = 0; k<j; k++)
{
if (CameraPos.at(k).x-CameraPos.at(j).x<diff && CameraPos.at(k).y-CameraPos.at(j).y<diff && CameraPos.at(k).z-CameraPos.at(j).z<diff)
break;
if(k == j-1)
CameraPos_temp.push_back(CameraPos[j]);
}
}
CameraPos = CameraPos_temp;
cout << "View points in total: " << CameraPos.size() << endl;
cout << "The coordinate of view point: " << endl;
for(int i=0; i < (int)CameraPos.size(); i++) {
cout << CameraPos.at(i).x <<' '<< CameraPos.at(i).y << ' ' << CameraPos.at(i).z << endl;
}
};
void IcoSphere::norm(float v[])
{
float len = 0;
for (int i = 0; i < 3; ++i) {
len += v[i] * v[i];
}
len = sqrt(len);
for (int i = 0; i < 3; ++i) {
v[i] /= ((float)len);
}
};
void IcoSphere::add(float v[])
{
Point3f temp_Campos;
std::vector<float>* temp = new std::vector<float>;
for (int k = 0; k < 3; ++k) {
vertexList.push_back(v[k]);
vertexNormalsList.push_back(v[k]);
temp->push_back(v[k]);
}
temp_Campos.x = temp->at(0);temp_Campos.y = temp->at(1);temp_Campos.z = temp->at(2);
CameraPos.push_back(temp_Campos);
};
void IcoSphere::subdivide(float v1[], float v2[], float v3[], int depth)
{
norm(v1);
norm(v2);
norm(v3);
if (depth == 0) {
add(v1);
add(v2);
add(v3);
return;
}
float* v12 = new float[3];
float* v23 = new float[3];
float* v31 = new float[3];
for (int i = 0; i < 3; ++i) {
v12[i] = (v1[i] + v2[i]) / 2;
v23[i] = (v2[i] + v3[i]) / 2;
v31[i] = (v3[i] + v1[i]) / 2;
}
norm(v12);
norm(v23);
norm(v31);
subdivide(v1, v12, v31, depth - 1);
subdivide(v2, v23, v12, depth - 1);
subdivide(v3, v31, v23, depth - 1);
subdivide(v12, v23, v31, depth - 1);
};
uint32_t IcoSphere::swap_endian(uint32_t val)
{
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
return (val << 16) | (val >> 16);
};
cv::Point3d IcoSphere::getCenter(cv::Mat cloud)
{
Point3f* data = cloud.ptr<cv::Point3f>();
Point3d dataout;
for(int i = 0; i < cloud.cols; ++i)
{
dataout.x += data[i].x;
dataout.y += data[i].y;
dataout.z += data[i].z;
}
dataout.x = dataout.x/cloud.cols;
dataout.y = dataout.y/cloud.cols;
dataout.z = dataout.z/cloud.cols;
return dataout;
};
float IcoSphere::getRadius(cv::Mat cloud, cv::Point3d center)
{
float radiusCam = 0;
Point3f* data = cloud.ptr<cv::Point3f>();
Point3d datatemp;
for(int i = 0; i < cloud.cols; ++i)
{
datatemp.x = data[i].x - (float)center.x;
datatemp.y = data[i].y - (float)center.y;
datatemp.z = data[i].z - (float)center.z;
float Radius = sqrt(pow(datatemp.x,2)+pow(datatemp.y,2)+pow(datatemp.z,2));
if(Radius > radiusCam)
{
radiusCam = Radius;
}
}
radiusCam *= 4;
return radiusCam;
};
void IcoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath)
{
char* a0 = (char*)malloc(1024);
strcpy(a0, headerPath);
char a1[] = "image";
char a2[] = "label";
char* headerPathimg = (char*)malloc(1024);
strcpy(headerPathimg, a0);
strcat(headerPathimg, a1);
char* headerPathlab = (char*)malloc(1024);
strcpy(headerPathlab, a0);
strcat(headerPathlab, a2);
std::ofstream headerImg(headerPathimg, ios::out|ios::binary);
std::ofstream headerLabel(headerPathlab, ios::out|ios::binary);
int headerimg[4] = {2051,num_item,rows,cols};
for (int i=0; i<4; i++)
headerimg[i] = swap_endian(headerimg[i]);
int headerlabel[2] = {2050,num_item};
for (int i=0; i<2; i++)
headerlabel[i] = swap_endian(headerlabel[i]);
headerImg.write(reinterpret_cast<const char*>(headerimg), sizeof(int)*4);
headerImg.close();
headerLabel.write(reinterpret_cast<const char*>(headerlabel), sizeof(int)*2);
headerLabel.close();
};
void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z)
{
int isrgb = 0;
cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);
char* A0 = (char*)malloc(1024);
strcpy(A0, binaryPath);
char A1[] = "image";
char A2[] = "label";
char* binPathimg = (char*)malloc(1024);
strcpy(binPathimg, A0);
strcat(binPathimg, A1);
char* binPathlab = (char*)malloc(1024);
strcpy(binPathlab, A0);
strcat(binPathlab, A2);
fstream img_file, lab_file;
img_file.open(binPathimg,ios::in);
lab_file.open(binPathlab,ios::in);
if(!img_file)
{
cout << "Creating the training data at: " << binaryPath << ". " << endl;
char* a0 = (char*)malloc(1024);
strcpy(a0, headerPath);
char a1[] = "image";
char a2[] = "label";
char* headerPathimg = (char*)malloc(1024);
strcpy(headerPathimg, a0);
strcat(headerPathimg,a1);
char* headerPathlab = (char*)malloc(1024);
strcpy(headerPathlab, a0);
strcat(headerPathlab,a2);
createHeader(num_item, 64, 64, binaryPath);
img_file.open(binPathimg,ios::out|ios::binary|ios::app);
lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
for (int r = 0; r < ImgforBin.rows; r++)
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
}
else
{
img_file.close();
lab_file.close();
img_file.open(binPathimg,ios::out|ios::binary|ios::app);
lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
cout <<"Concatenating the training data at: " << binaryPath << ". " << endl;
for (int r = 0; r < ImgforBin.rows; r++)
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
}
img_file.close();
lab_file.close();
};
IcoSphere::IcoSphere(float radius_in, int depth_in)
{
X = 0.5f;
Z = 0.5f;
X *= (int)radius_in;
Z *= (int)radius_in;
diff = 0.00000005964;
float vdata[12][3] = { { -X, 0.0f, Z }, { X, 0.0f, Z },
{ -X, 0.0f, -Z }, { X, 0.0f, -Z }, { 0.0f, Z, X }, { 0.0f, Z, -X },
{ 0.0f, -Z, X }, { 0.0f, -Z, -X }, { Z, X, 0.0f }, { -Z, X, 0.0f },
{ Z, -X, 0.0f }, { -Z, -X, 0.0f } };
int tindices[20][3] = { { 0, 4, 1 }, { 0, 9, 4 }, { 9, 5, 4 },
{ 4, 5, 8 }, { 4, 8, 1 }, { 8, 10, 1 }, { 8, 3, 10 }, { 5, 3, 8 },
{ 5, 2, 3 }, { 2, 7, 3 }, { 7, 10, 3 }, { 7, 6, 10 }, { 7, 11, 6 },
{ 11, 0, 6 }, { 0, 1, 6 }, { 6, 1, 10 }, { 9, 0, 11 },
{ 9, 11, 2 }, { 9, 2, 5 }, { 7, 2, 11 } };
// Iterate over points
for (int i = 0; i < 20; ++i)
{
subdivide(vdata[tindices[i][0]], vdata[tindices[i][1]],
vdata[tindices[i][2]], depth_in);
}
CameraPos_temp.push_back(CameraPos[0]);
for (int j = 1; j<int(CameraPos.size()); j++)
{
for (int k = 0; k<j; k++)
{
if (CameraPos.at(k).x-CameraPos.at(j).x<diff && CameraPos.at(k).y-CameraPos.at(j).y<diff && CameraPos.at(k).z-CameraPos.at(j).z<diff)
break;
if(k == j-1)
CameraPos_temp.push_back(CameraPos[j]);
}
}
CameraPos = CameraPos_temp;
cout << "View points in total: " << CameraPos.size() << endl;
cout << "The coordinate of view point: " << endl;
for(int i=0; i < (int)CameraPos.size(); i++)
{
cout << CameraPos.at(i).x <<' '<< CameraPos.at(i).y << ' ' << CameraPos.at(i).z << endl;
}
};
void IcoSphere::norm(float v[])
{
float len = 0;
for (int i = 0; i < 3; ++i)
{
len += v[i] * v[i];
}
len = sqrt(len);
for (int i = 0; i < 3; ++i)
{
v[i] /= ((float)len);
}
};
void IcoSphere::add(float v[])
{
Point3f temp_Campos;
std::vector<float>* temp = new std::vector<float>;
for (int k = 0; k < 3; ++k)
{
vertexList.push_back(v[k]);
vertexNormalsList.push_back(v[k]);
temp->push_back(v[k]);
}
temp_Campos.x = temp->at(0);temp_Campos.y = temp->at(1);temp_Campos.z = temp->at(2);
CameraPos.push_back(temp_Campos);
};
void IcoSphere::subdivide(float v1[], float v2[], float v3[], int depth)
{
norm(v1);
norm(v2);
norm(v3);
if (depth == 0)
{
add(v1);
add(v2);
add(v3);
return;
}
float* v12 = new float[3];
float* v23 = new float[3];
float* v31 = new float[3];
for (int i = 0; i < 3; ++i)
{
v12[i] = (v1[i] + v2[i]) / 2;
v23[i] = (v2[i] + v3[i]) / 2;
v31[i] = (v3[i] + v1[i]) / 2;
}
norm(v12);
norm(v23);
norm(v31);
subdivide(v1, v12, v31, depth - 1);
subdivide(v2, v23, v12, depth - 1);
subdivide(v3, v31, v23, depth - 1);
subdivide(v12, v23, v31, depth - 1);
};
uint32_t IcoSphere::swap_endian(uint32_t val)
{
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
return (val << 16) | (val >> 16);
};
cv::Point3d IcoSphere::getCenter(cv::Mat cloud)
{
Point3f* data = cloud.ptr<cv::Point3f>();
Point3d dataout;
for(int i = 0; i < cloud.cols; ++i)
{
dataout.x += data[i].x;
dataout.y += data[i].y;
dataout.z += data[i].z;
}
dataout.x = dataout.x/cloud.cols;
dataout.y = dataout.y/cloud.cols;
dataout.z = dataout.z/cloud.cols;
return dataout;
};
float IcoSphere::getRadius(cv::Mat cloud, cv::Point3d center)
{
float radiusCam = 0;
Point3f* data = cloud.ptr<cv::Point3f>();
Point3d datatemp;
for(int i = 0; i < cloud.cols; ++i)
{
datatemp.x = data[i].x - (float)center.x;
datatemp.y = data[i].y - (float)center.y;
datatemp.z = data[i].z - (float)center.z;
float Radius = sqrt(pow(datatemp.x,2)+pow(datatemp.y,2)+pow(datatemp.z,2));
if(Radius > radiusCam)
{
radiusCam = Radius;
}
}
radiusCam *= 4;
return radiusCam;
};
void IcoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath)
{
char* a0 = (char*)malloc(1024);
strcpy(a0, headerPath);
char a1[] = "image";
char a2[] = "label";
char* headerPathimg = (char*)malloc(1024);
strcpy(headerPathimg, a0);
strcat(headerPathimg, a1);
char* headerPathlab = (char*)malloc(1024);
strcpy(headerPathlab, a0);
strcat(headerPathlab, a2);
std::ofstream headerImg(headerPathimg, ios::out|ios::binary);
std::ofstream headerLabel(headerPathlab, ios::out|ios::binary);
int headerimg[4] = {2051,num_item,rows,cols};
for (int i=0; i<4; i++)
headerimg[i] = swap_endian(headerimg[i]);
int headerlabel[2] = {2050,num_item};
for (int i=0; i<2; i++)
headerlabel[i] = swap_endian(headerlabel[i]);
headerImg.write(reinterpret_cast<const char*>(headerimg), sizeof(int)*4);
headerImg.close();
headerLabel.write(reinterpret_cast<const char*>(headerlabel), sizeof(int)*2);
headerLabel.close();
};
void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z)
{
int isrgb = 0;
cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);
char* A0 = (char*)malloc(1024);
strcpy(A0, binaryPath);
char A1[] = "image";
char A2[] = "label";
char* binPathimg = (char*)malloc(1024);
strcpy(binPathimg, A0);
strcat(binPathimg, A1);
char* binPathlab = (char*)malloc(1024);
strcpy(binPathlab, A0);
strcat(binPathlab, A2);
fstream img_file, lab_file;
img_file.open(binPathimg,ios::in);
lab_file.open(binPathlab,ios::in);
if(!img_file)
{
cout << "Creating the training data at: " << binaryPath << ". " << endl;
char* a0 = (char*)malloc(1024);
strcpy(a0, headerPath);
char a1[] = "image";
char a2[] = "label";
char* headerPathimg = (char*)malloc(1024);
strcpy(headerPathimg, a0);
strcat(headerPathimg,a1);
char* headerPathlab = (char*)malloc(1024);
strcpy(headerPathlab, a0);
strcat(headerPathlab,a2);
createHeader(num_item, 64, 64, binaryPath);
img_file.open(binPathimg,ios::out|ios::binary|ios::app);
lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
for (int r = 0; r < ImgforBin.rows; r++)
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
}
else
{
img_file.close();
lab_file.close();
img_file.open(binPathimg,ios::out|ios::binary|ios::app);
lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
cout <<"Concatenating the training data at: " << binaryPath << ". " << endl;
for (int r = 0; r < ImgforBin.rows; r++)
{
img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
}
signed char templab = (signed char)label_class;
lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
}
img_file.close();
lab_file.close();
};
}}

Loading…
Cancel
Save