diff --git a/modules/cnn_3dobj/CMakeLists.txt b/modules/cnn_3dobj/CMakeLists.txt index 358f7fcba..2285ecf9c 100644 --- a/modules/cnn_3dobj/CMakeLists.txt +++ b/modules/cnn_3dobj/CMakeLists.txt @@ -1,3 +1,3 @@ set(the_description "CNN for 3D object recognition and pose estimation including a completed Sphere View on 3D objects") -ocv_define_module(cnn_3dobj opencv_core opencv_imgproc opencv_viz opencv_highgui caffe protobuf leveldb glog OPTIONAL WRAP python) -target_link_libraries(opencv_cnn_3dobj caffe protobuf leveldb glog) +ocv_define_module(cnn_3dobj opencv_core opencv_imgproc opencv_viz opencv_highgui caffe protobuf glog OPTIONAL WRAP python) +target_link_libraries(opencv_cnn_3dobj caffe protobuf glog) diff --git a/modules/cnn_3dobj/README.md b/modules/cnn_3dobj/README.md index 02efaf692..bc41071b1 100644 --- a/modules/cnn_3dobj/README.md +++ b/modules/cnn_3dobj/README.md @@ -63,27 +63,11 @@ $ ./examples/triplet/train_3d_triplet.sh ###After doing this, you will get .caffemodel files as the trained net work. I have already provide the net definition .prototxt files and the trained .caffemodel in /modules/cnn_3dobj/samples/build folder, you could just use them without training in caffe. If you are not interested on feature analysis with the help of binary files provided in Demo2, just skip to Demo3 for feature extraction or Demo4 for classifier. ============== +#Demo4: ``` $ cd $ cd /modules/cnn_3dobj/samples/build ``` -#Demo2: -###Convert data into leveldb format from folder ../data/images_all for feature extraction afterwards. The leveldb files including all data will be stored in ../data/dbfile. If you will use the OpenCV defined feature extraction process, you could also skip Demo2 for data converting, just run Demo3 after Demo1 for feature extraction because Demo3 also includes the db file converting process before feature extraction, but if you want to use demo4 for classification, this demo will be used in advance to generate a file name list for the prediction list. -``` -$ ./datatrans_test -``` - -============== -#Demo3: -###feature extraction, this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe and outputting a binary file including all extracted feature. -``` -$ ./feature_extract_test -``` -###This will extract feature from a set of images in a folder as vector for further classification and a binary file with containing all feature vectors of each sample. -###After running this, you will get a binary file storing features in ../data/feature folder, I can provide a Matlab script reading this file if someone need it. If you don't need the binary file, the feature could also be stored in vector. - -============== -#Demo4: ###Classifier, this will extracting the feature of a single image and compare it with features of gallery samples for prediction. Demo2 should be used in advance to generate a file name list for the prediction list. This demo uses a set of images for feature extraction in a given path, these features will be a reference for prediction on target image. Just run: ``` $ ./classify_test diff --git a/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp b/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp index 2fb76451c..2e65e30b9 100644 --- a/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp +++ b/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp @@ -57,16 +57,13 @@ the use of this software, even if advised of the possibility of such damage. #include #include #include -#include -#include -#include #define CPU_ONLY -#include -#include -#include -#include -#include -#include +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/net.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" #include "opencv2/viz/vizcore.hpp" #include "opencv2/highgui.hpp" #include "opencv2/highgui/highgui_c.h" @@ -135,33 +132,6 @@ class CV_EXPORTS_W IcoSphere }; -class CV_EXPORTS_W DataTrans -{ - private: - std::set all_class_name; - std::map class2id; - public: - DataTrans(); - CV_WRAP void list_dir(const char *path,std::vector& files,bool r); - /** @brief Use directory of the file including images starting with an int label as the name of each image. - */ - CV_WRAP string get_classname(string path); - /** @brief - */ - CV_WRAP int get_labelid(string fileName); - /** @brief Get the label of each image. - */ - CV_WRAP void loadimg(string path,char* buffer,bool is_color); - /** @brief Load images. - */ - CV_WRAP void convert(string imgdir,string outputdb,string attachdir,int channel,int width,int height); - /** @brief Convert a set of images as a leveldb database for CNN training. - */ - CV_WRAP std::vector feature_extraction_pipeline(std::string pretrained_binary_proto, std::string feature_extraction_proto, std::string save_feature_dataset_names, std::string extract_feature_blob_names, int num_mini_batches, std::string device, int dev_id); - /** @brief Extract feature into a binary file and vector for classification, the model proto and network proto are needed, All images in the file root will be used for feature extraction. - */ -}; - class CV_EXPORTS_W Classification { private: @@ -180,13 +150,20 @@ class CV_EXPORTS_W Classification /** @brief Convert the input image to the input image format of the network. */ public: - Classification(const string& model_file, const string& trained_file, const string& mean_file, const string& label_file); + Classification(); + void list_dir(const char *path,std::vector& files,bool r); + /** @brief Get the file name from a root dictionary. + */ + void NetSetter(const string& model_file, const string& trained_file, const string& mean_file, const string& cpu_only, int device_id); /** @brief Initiate a classification structure. */ - std::vector > Classify(const std::vector& reference, const cv::Mat& img, int N = 4, bool mean_substract = false); + void GetLabellist(const std::vector& name_gallery); + /** @brief Get the label of the gallery images for result displaying in prediction. + */ + std::vector > Classify(const cv::Mat& reference, const cv::Mat& img, int N, bool mean_substract = false); /** @brief Make a classification. */ - cv::Mat feature_extract(const cv::Mat& img, bool mean_subtract); + void FeatureExtract(InputArray inputimg, OutputArray feature, bool mean_subtract); /** @brief Extract a single featrue of one image. */ std::vector Argmax(const std::vector& v, int N); diff --git a/modules/cnn_3dobj/samples/CMakeLists.txt b/modules/cnn_3dobj/samples/CMakeLists.txt index 4dc58ab48..a38d71427 100644 --- a/modules/cnn_3dobj/samples/CMakeLists.txt +++ b/modules/cnn_3dobj/samples/CMakeLists.txt @@ -3,19 +3,11 @@ SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb ") SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") project(sphereview_test) find_package(OpenCV REQUIRED) -set(SOURCES_1 sphereview_3dobj_demo.cpp) +set(SOURCES_generator sphereview_3dobj_demo.cpp) include_directories(${OpenCV_INCLUDE_DIRS}) -add_executable(sphereview_test ${SOURCES_1}) +add_executable(sphereview_test ${SOURCES_generator}) target_link_libraries(sphereview_test ${OpenCV_LIBS}) -set(SOURCES_2 datatrans_demo.cpp) -add_executable(datatrans_test ${SOURCES_2}) -target_link_libraries(datatrans_test ${OpenCV_LIBS}) - -set(SOURCES_3 feature_extract_demo.cpp) -add_executable(feature_extract_test ${SOURCES_3}) -target_link_libraries(feature_extract_test ${OpenCV_LIBS}) - -set(SOURCES_4 classifyIMG_demo.cpp) -add_executable(classify_test ${SOURCES_4}) +set(SOURCES_classifier classifyIMG_demo.cpp) +add_executable(classify_test ${SOURCES_classifier}) target_link_libraries(classify_test ${OpenCV_LIBS}) diff --git a/modules/cnn_3dobj/samples/classifyDB_demo.cpp b/modules/cnn_3dobj/samples/classifyDB_demo.cpp deleted file mode 100644 index f13a36085..000000000 --- a/modules/cnn_3dobj/samples/classifyDB_demo.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Software License Agreement (BSD License) - * - * Copyright (c) 2009, Willow Garage, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Willow Garage, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ -#include -#include -using namespace cv; -using namespace std; -using namespace cv::cnn_3dobj; -int main(int argc, char** argv) -{ - const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}" - "{src_dir | ../data/images_all/ | Source direction of the images ready for being converted to leveldb dataset.}" - "{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }" - "{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }" - "{channel | 1 | Channel of the images. }" - "{width | 64 | Width of images}" - "{height | 64 | Height of images}" - "{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}" - "{network_forDB | ../data/3d_triplet_galleryIMG.prototxt | Network definition file used for extracting feature from levelDB data, causion: the path of levelDB training samples must be wrotten in in .prototxt files in Phase TEST}" - "{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | Output of the extracted feature in form of binary files together with the vector features as the feature.}" - "{extract_feature_blob_names | feat | Layer used for feature extraction in CNN.}" - "{num_mini_batches | 4 | Batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}" - "{device | CPU | Device: CPU or GPU.}" - "{dev_id | 0 | ID of GPU.}" - "{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}" - "{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}" - "{label_file | ../data/dbfileimage_filename | A namelist including all gallery images.}" - "{target_img | ../data/images_all/2_13.png | Path of image waiting to be classified.}" - "{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"; - cv::CommandLineParser parser(argc, argv, keys); - parser.about("Demo for Sphere View data generation"); - if (parser.has("help")) - { - parser.printMessage(); - return 0; - } - string src_dir = parser.get("src_dir"); - string src_dst = parser.get("src_dst"); - string attach_dir = parser.get("attach_dir"); - int channel = parser.get("channel"); - int width = parser.get("width"); - int height = parser.get("height"); - string caffemodel = parser.get("caffemodel"); - string network_forDB = parser.get("network_forDB"); - string save_feature_dataset_names = parser.get("save_feature_dataset_names"); - string extract_feature_blob_names = parser.get("extract_feature_blob_names"); - int num_mini_batches = parser.get("num_mini_batches"); - string device = parser.get("device"); - int dev_id = parser.get("dev_id"); - string network_forIMG = parser.get("network_forIMG"); - string mean_file = parser.get("mean_file"); - string label_file = parser.get("label_file"); - string target_img = parser.get("target_img"); - int num_candidate = parser.get("num_candidate"); - cv::cnn_3dobj::DataTrans transTemp; - transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height); - std::vector feature_reference = transTemp.feature_extraction_pipeline(caffemodel, network_forDB, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id); -////start another demo - cv::cnn_3dobj::Classification classifier(network_forIMG, caffemodel, mean_file, label_file); - - std::cout << std::endl << "---------- Prediction for " - << target_img << " ----------" << std::endl; - - cv::Mat img = cv::imread(target_img, -1); - // CHECK(!img.empty()) << "Unable to decode image " << target_img; - std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl; - std::vector > prediction; - for (unsigned int i = 0; i < feature_reference.size(); i++) - std::cout << feature_reference[i] << endl; - cv::Mat feature_test = classifier.feature_extract(img, false); - std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test.t() << std::endl; - prediction = classifier.Classify(feature_reference, img, num_candidate, false); - // Print the top N prediction. - std::cout << std::endl << "---------- Prediction result(distance - file name in gallery) ----------" << std::endl; - for (size_t i = 0; i < prediction.size(); ++i) { - std::pair p = prediction[i]; - std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" - << p.first << "\"" << std::endl; - } - return 0; -} diff --git a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp index 6d29eb281..c286a4c95 100644 --- a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp +++ b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp @@ -44,36 +44,40 @@ int main(int argc, char** argv) "{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}" "{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}" "{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}" - "{label_file | ../data/label_all.txt | A namelist including all gallery images.}" - "{target_img | ../data/images_all/2_13.png | Path of image waiting to be classified.}" - "{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"; + "{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}" + "{num_candidate | 6 | Number of candidates in gallery as the prediction result.}" + "{device | CPU | device}" + "{dev_id | 0 | dev_id}"; cv::CommandLineParser parser(argc, argv, keys); parser.about("Demo for Sphere View data generation"); if (parser.has("help")) { - parser.printMessage(); - return 0; + parser.printMessage(); + return 0; } string src_dir = parser.get("src_dir"); string caffemodel = parser.get("caffemodel"); string network_forIMG = parser.get("network_forIMG"); string mean_file = parser.get("mean_file"); - string label_file = parser.get("label_file"); string target_img = parser.get("target_img"); int num_candidate = parser.get("num_candidate"); - cv::cnn_3dobj::DataTrans transTemp; + string device = parser.get("device"); + int dev_id = parser.get("dev_id"); + + cv::cnn_3dobj::Classification classifier; + classifier.NetSetter(network_forIMG, caffemodel, mean_file, device, dev_id); std::vector name_gallery; - transTemp.list_dir(src_dir.c_str(), name_gallery, false); + classifier.list_dir(src_dir.c_str(), name_gallery, false); + classifier.GetLabellist(name_gallery); for (unsigned int i = 0; i < name_gallery.size(); i++) { - name_gallery[i] = src_dir + name_gallery[i]; + name_gallery[i] = src_dir + name_gallery[i]; } -////start another demo - cv::cnn_3dobj::Classification classifier(network_forIMG, caffemodel, mean_file, label_file); - std::vector feature_reference; + std::vector img_gallery; + cv::Mat feature_reference; for (unsigned int i = 0; i < name_gallery.size(); i++) { - cv::Mat img_gallery = cv::imread(name_gallery[i], -1); - feature_reference.push_back(classifier.feature_extract(img_gallery, false)); + img_gallery.push_back(cv::imread(name_gallery[i], -1)); } + classifier.FeatureExtract(img_gallery, feature_reference, false); std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl; @@ -82,17 +86,18 @@ int main(int argc, char** argv) // CHECK(!img.empty()) << "Unable to decode image " << target_img; std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl; std::vector > prediction; - for (unsigned int i = 0; i < feature_reference.size(); i++) - std::cout << feature_reference[i].t() << endl; - cv::Mat feature_test = classifier.feature_extract(img, false); - std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test.t() << std::endl; + for (unsigned int i = 0; i < feature_reference.rows; i++) + std::cout << feature_reference.row(i) << endl; + cv::Mat feature_test; + classifier.FeatureExtract(img, feature_test, false); + std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl; prediction = classifier.Classify(feature_reference, img, num_candidate, false); // Print the top N prediction. - std::cout << std::endl << "---------- Prediction result(distance - file name in gallery) ----------" << std::endl; + std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl; for (size_t i = 0; i < prediction.size(); ++i) { - std::pair p = prediction[i]; - std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" - << p.first << "\"" << std::endl; + std::pair p = prediction[i]; + std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" + << p.first << "\"" << std::endl; } return 0; } diff --git a/modules/cnn_3dobj/samples/data/3d_triplet_galleryIMG.prototxt b/modules/cnn_3dobj/samples/data/3d_triplet_galleryIMG.prototxt deleted file mode 100644 index a5f7ce15a..000000000 --- a/modules/cnn_3dobj/samples/data/3d_triplet_galleryIMG.prototxt +++ /dev/null @@ -1,94 +0,0 @@ -name: "3d_triplet" -layer { - name: "data" - type: "Data" - top: "data" - top: "label" - include { - phase: TEST - } - data_param { - source: "/home/wangyida/Desktop/opencv_contrib/modules/nouse_test/samples/data/dbfile" - batch_size: 69 - } -} -layer { - name: "conv1" - type: "Convolution" - bottom: "data" - top: "conv1" - convolution_param { - num_output: 16 - kernel_size: 8 - stride: 1 - } -} -layer { - name: "pool1" - type: "Pooling" - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } -} -layer { - name: "relu1" - type: "ReLU" - bottom: "pool1" - top: "pool1" -} -layer { - name: "conv2" - type: "Convolution" - bottom: "pool1" - top: "conv2" - convolution_param { - num_output: 7 - kernel_size: 5 - stride: 1 - } -} -layer { - name: "pool2" - type: "Pooling" - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 2 - stride: 2 - } -} -layer { - name: "relu2" - type: "ReLU" - bottom: "pool2" - top: "pool2" -} -layer { - name: "ip1" - type: "InnerProduct" - bottom: "pool2" - top: "ip1" - inner_product_param { - num_output: 256 - } -} -layer { - name: "relu3" - type: "ReLU" - bottom: "ip1" - top: "ip1" -} -layer { - name: "feat" - type: "InnerProduct" - bottom: "ip1" - top: "feat" - inner_product_param { - num_output: 4 - } -} diff --git a/modules/cnn_3dobj/samples/data/3d_triplet_iter_10000.caffemodel b/modules/cnn_3dobj/samples/data/3d_triplet_iter_10000.caffemodel deleted file mode 100644 index 6226c0bb5..000000000 Binary files a/modules/cnn_3dobj/samples/data/3d_triplet_iter_10000.caffemodel and /dev/null differ diff --git a/modules/cnn_3dobj/samples/data/3d_triplet_iter_20000.caffemodel b/modules/cnn_3dobj/samples/data/3d_triplet_iter_20000.caffemodel new file mode 100644 index 000000000..ceeca1a69 Binary files /dev/null and b/modules/cnn_3dobj/samples/data/3d_triplet_iter_20000.caffemodel differ diff --git a/modules/cnn_3dobj/samples/datatrans_demo.cpp b/modules/cnn_3dobj/samples/datatrans_demo.cpp deleted file mode 100644 index dbbf57b5a..000000000 --- a/modules/cnn_3dobj/samples/datatrans_demo.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Software License Agreement (BSD License) - * - * Copyright (c) 2009, Willow Garage, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Willow Garage, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ -#include -using namespace cv; -using namespace std; -using namespace cv::cnn_3dobj; -int main(int argc, char* argv[]) -{ - const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}" - "{src_dir | ../data/images_all | Source direction of the images ready for being converted to leveldb dataset.}" - "{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }" - "{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }" - "{channel | 1 | Channel of the images. }" - "{width | 64 | Width of images}" - "{height | 64 | Height of images}"; - cv::CommandLineParser parser(argc, argv, keys); - parser.about("Demo for Sphere View data generation"); - if (parser.has("help")) - { - parser.printMessage(); - return 0; - } - string src_dir = parser.get("src_dir"); - string src_dst = parser.get("src_dst"); - string attach_dir = parser.get("attach_dir"); - int channel = parser.get("channel"); - int width = parser.get("width"); - int height = parser.get("height"); - cv::cnn_3dobj::DataTrans Trans; - Trans.convert(src_dir,src_dst,attach_dir,channel,width,height); - std::cout << std::endl << "All featrues of images in: " << std::endl << src_dir << std::endl << "have been converted to levelDB data in: " << std::endl << src_dst << std::endl << "for extracting feature of gallery images in classification efficiently, this convertion is not needed in feature extraction of test image" << std::endl; -} diff --git a/modules/cnn_3dobj/samples/feature_extract_demo.cpp b/modules/cnn_3dobj/samples/feature_extract_demo.cpp deleted file mode 100644 index 26e9c0fa0..000000000 --- a/modules/cnn_3dobj/samples/feature_extract_demo.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Software License Agreement (BSD License) - * - * Copyright (c) 2009, Willow Garage, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * * Neither the name of Willow Garage, Inc. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - */ -#include -#include // for snprintf -#include -#include -#include -#include "google/protobuf/text_format.h" -#include -#include -#define CPU_ONLY -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/net.hpp" -#include "caffe/proto/caffe.pb.h" -#include "caffe/util/io.hpp" -#include "caffe/vision_layers.hpp" -using caffe::Blob; -using caffe::Caffe; -using caffe::Datum; -using caffe::Net; -//using boost::shared_ptr; -using std::string; -//namespace db = caffe::db; -using namespace cv; -using namespace std; -using namespace cv::cnn_3dobj; -int main(int argc, char* argv[]) -{ - const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}" - "{src_dir | ../data/images_all/ | Source direction of the images ready for being converted to leveldb dataset.}" - "{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }" - "{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }" - "{channel | 1 | Channel of the images. }" - "{width | 64 | Width of images}" - "{height | 64 | Height of images}" - "{pretrained_binary_proto | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}" - "{feature_extraction_proto | ../data/3d_triplet_train_test.prototxt | network definition in .prototxt the path of the training samples must be wrotten in in .prototxt files in Phase TEST}" - "{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | the output of the extracted feature in form of binary files together with the vector features as the feature.}" - "{extract_feature_blob_names | feat | the layer used for feature extraction in CNN.}" - "{num_mini_batches | 6 | batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}" - "{device | CPU | device}" - "{dev_id | 0 | dev_id}"; - cv::CommandLineParser parser(argc, argv, keys); - parser.about("Demo for Sphere View data generation"); - if (parser.has("help")) - { - parser.printMessage(); - return 0; - } - string src_dir = parser.get("src_dir"); - string src_dst = parser.get("src_dst"); - string attach_dir = parser.get("attach_dir"); - int channel = parser.get("channel"); - int width = parser.get("width"); - int height = parser.get("height"); - string pretrained_binary_proto = parser.get("pretrained_binary_proto"); - string feature_extraction_proto = parser.get("feature_extraction_proto"); - string save_feature_dataset_names = parser.get("save_feature_dataset_names"); - string extract_feature_blob_names = parser.get("extract_feature_blob_names"); - int num_mini_batches = parser.get("num_mini_batches"); - string device = parser.get("device"); - int dev_id = parser.get("dev_id"); - cv::cnn_3dobj::DataTrans transTemp; - transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height); - std::vector extractedFeature = transTemp.feature_extraction_pipeline(pretrained_binary_proto, feature_extraction_proto, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id); -} diff --git a/modules/cnn_3dobj/src/cnn_classification.cpp b/modules/cnn_3dobj/src/cnn_classification.cpp index 84c6bf2fd..2e0c483cc 100644 --- a/modules/cnn_3dobj/src/cnn_classification.cpp +++ b/modules/cnn_3dobj/src/cnn_classification.cpp @@ -6,192 +6,236 @@ namespace cv { namespace cnn_3dobj { - Classification::Classification(const string& model_file, const string& trained_file, const string& mean_file, const string& label_file) { - #ifdef CPU_ONLY - caffe::Caffe::set_mode(caffe::Caffe::CPU); - #else - caffe::Caffe::set_mode(caffe::Caffe::GPU); - #endif - - /* Load the network. */ - net_.reset(new Net(model_file, TEST)); - net_->CopyTrainedLayersFrom(trained_file); - - CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input."; - CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output."; - - Blob* input_layer = net_->input_blobs()[0]; - num_channels_ = input_layer->channels(); - CHECK(num_channels_ == 3 || num_channels_ == 1) - << "Input layer should have 1 or 3 channels."; - input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); - - /* Load the binaryproto mean file. */ - SetMean(mean_file); - - /* Load labels. */ - std::ifstream labels(label_file.c_str()); - CHECK(labels) << "Unable to open labels file " << label_file; - string line; - while (std::getline(labels, line)) - labels_.push_back(string(line)); - - /* Blob* output_layer = net_->output_blobs()[0]; - CHECK_EQ(labels_.size(), output_layer->channels()) - << "Number of labels is different from the output layer dimension.";*/ - } - - /*bool Classifier::PairCompare(const std::pair& lhs, - const std::pair& rhs) { - return lhs.first > rhs.first; - }*/ - - /* Return the indices of the top N values of vector v. */ - std::vector Classification::Argmax(const std::vector& v, int N) { - std::vector > pairs; - for (size_t i = 0; i < v.size(); ++i) - pairs.push_back(std::make_pair(v[i], i)); - std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end()); - - std::vector result; - for (int i = 0; i < N; ++i) - result.push_back(pairs[i].second); - return result; - } - - //Return the top N predictions. - std::vector > Classification::Classify(const std::vector& reference, const cv::Mat& img, int N, bool mean_substract) { - cv::Mat feature = feature_extract(img, mean_substract); - std::vector output; - for (unsigned int i = 0; i < reference.size(); i++) { - cv::Mat f1 = reference.at(i); - cv::Mat f2 = feature; - cv::Mat output_temp = f1-f2; - output.push_back(cv::norm(output_temp)); - } - std::vector maxN = Argmax(output, N); - std::vector > predictions; - for (int i = 0; i < N; ++i) { - int idx = maxN[i]; - predictions.push_back(std::make_pair(labels_[idx], output[idx])); - } - - return predictions; - } - - /* Load the mean file in binaryproto format. */ - void Classification::SetMean(const string& mean_file) { - BlobProto blob_proto; - ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); - - /* Convert from BlobProto to Blob */ - Blob mean_blob; - mean_blob.FromProto(blob_proto); - CHECK_EQ(mean_blob.channels(), num_channels_) - << "Number of channels of mean file doesn't match input layer."; - - /* The format of the mean file is planar 32-bit float BGR or grayscale. */ - std::vector channels; - float* data = mean_blob.mutable_cpu_data(); - for (int i = 0; i < num_channels_; ++i) { - /* Extract an individual channel. */ - cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); - channels.push_back(channel); - data += mean_blob.height() * mean_blob.width(); - } - - /* Merge the separate channels into a single image. */ - cv::Mat mean; - cv::merge(channels, mean); - - /* Compute the global mean pixel value and create a mean image - * filled with this value. */ - cv::Scalar channel_mean = cv::mean(mean); - mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); - } - - cv::Mat Classification::feature_extract(const cv::Mat& img, bool mean_subtract) { - Blob* input_layer = net_->input_blobs()[0]; - input_layer->Reshape(1, num_channels_, - input_geometry_.height, input_geometry_.width); - /* Forward dimension change to all layers. */ - net_->Reshape(); - - std::vector input_channels; - WrapInputLayer(&input_channels); - - Preprocess(img, &input_channels, mean_subtract); - - net_->ForwardPrefilled(); - - /* Copy the output layer to a std::vector */ - Blob* output_layer = net_->output_blobs()[0]; - const float* begin = output_layer->cpu_data(); - const float* end = begin + output_layer->channels(); - //return std::vector(begin, end); - std::vector featureVec = std::vector(begin, end); - cv::Mat feature = cv::Mat(featureVec, true); - return feature; - } - - /* Wrap the input layer of the network in separate cv::Mat objects - * (one per channel). This way we save one memcpy operation and we - * don't need to rely on cudaMemcpy2D. The last preprocessing - * operation will write the separate channels directly to the input - * layer. */ - void Classification::WrapInputLayer(std::vector* input_channels) { - Blob* input_layer = net_->input_blobs()[0]; - - int width = input_layer->width(); - int height = input_layer->height(); - float* input_data = input_layer->mutable_cpu_data(); - for (int i = 0; i < input_layer->channels(); ++i) { - cv::Mat channel(height, width, CV_32FC1, input_data); - input_channels->push_back(channel); - input_data += width * height; - } - } - - void Classification::Preprocess(const cv::Mat& img, - std::vector* input_channels, bool mean_subtract) { - /* Convert the input image to the input image format of the network. */ - cv::Mat sample; - if (img.channels() == 3 && num_channels_ == 1) - cv::cvtColor(img, sample, CV_BGR2GRAY); - else if (img.channels() == 4 && num_channels_ == 1) - cv::cvtColor(img, sample, CV_BGRA2GRAY); - else if (img.channels() == 4 && num_channels_ == 3) - cv::cvtColor(img, sample, CV_BGRA2BGR); - else if (img.channels() == 1 && num_channels_ == 3) - cv::cvtColor(img, sample, CV_GRAY2BGR); - else - sample = img; - - cv::Mat sample_resized; - if (sample.size() != input_geometry_) - cv::resize(sample, sample_resized, input_geometry_); - else - sample_resized = sample; - - cv::Mat sample_float; - if (num_channels_ == 3) - sample_resized.convertTo(sample_float, CV_32FC3); - else - sample_resized.convertTo(sample_float, CV_32FC1); - - cv::Mat sample_normalized; - if (mean_subtract) - cv::subtract(sample_float, mean_, sample_normalized); - else - sample_normalized = sample_float; - - /* This operation will write the separate BGR planes directly to the - * input layer of the network because it is wrapped by the cv::Mat - * objects in input_channels. */ - cv::split(sample_normalized, *input_channels); - - CHECK(reinterpret_cast(input_channels->at(0).data) - == net_->input_blobs()[0]->cpu_data()) - << "Input channels are not wrapping the input layer of the network."; - } -}} + Classification::Classification(){}; + void Classification::list_dir(const char *path,vector& files,bool r) + { + DIR *pDir; + struct dirent *ent; + char childpath[512]; + pDir = opendir(path); + memset(childpath, 0, sizeof(childpath)); + while ((ent = readdir(pDir)) != NULL) + { + if (ent->d_type & DT_DIR) + { + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) + { + continue; + } + if(r) + { + sprintf(childpath, "%s/%s", path, ent->d_name); + Classification::list_dir(childpath,files,false); + } + } + else + { + files.push_back(ent->d_name); + } + } + sort(files.begin(),files.end()); + }; + + void Classification::NetSetter(const string& model_file, const string& trained_file, const string& mean_file, const string& cpu_only, int device_id) + { + if (strcmp(cpu_only.c_str(), "CPU") == 0) + { + caffe::Caffe::set_mode(caffe::Caffe::CPU); + } + else + { + caffe::Caffe::set_mode(caffe::Caffe::GPU); + caffe::Caffe::SetDevice(device_id); + } + /* Load the network. */ + net_.reset(new Net(model_file, TEST)); + net_->CopyTrainedLayersFrom(trained_file); + CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input."; + CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output."; + Blob* input_layer = net_->input_blobs()[0]; + num_channels_ = input_layer->channels(); + CHECK(num_channels_ == 3 || num_channels_ == 1) + << "Input layer should have 1 or 3 channels."; + input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); + /* Load the binaryproto mean file. */ + SetMean(mean_file); + }; + + void Classification::GetLabellist(const std::vector& name_gallery) + { + for (unsigned int i = 0; i < name_gallery.size(); ++i) + labels_.push_back(name_gallery[i]); + }; + + /* Return the indices of the top N values of vector v. */ + std::vector Classification::Argmax(const std::vector& v, int N) + { + std::vector > pairs; + for (size_t i = 0; i < v.size(); ++i) + pairs.push_back(std::make_pair(v[i], i)); + std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end()); + std::vector result; + for (int i = 0; i < N; ++i) + result.push_back(pairs[i].second); + return result; + }; + + //Return the top N predictions. + std::vector > Classification::Classify(const cv::Mat& reference, const cv::Mat& img, int N, bool mean_substract) + { + cv::Mat feature; + Classification::FeatureExtract(img, feature, mean_substract); + std::vector output; + for (int i = 0; i < reference.rows; i++) + { + cv::Mat f1 = reference.row(i); + cv::Mat f2 = feature; + cv::Mat output_temp = f1-f2; + output.push_back(cv::norm(output_temp)); + } + std::vector maxN = Argmax(output, N); + std::vector > predictions; + for (int i = 0; i < N; ++i) + { + int idx = maxN[i]; + predictions.push_back(std::make_pair(labels_[idx], output[idx])); + } + return predictions; + }; + + /* Load the mean file in binaryproto format. */ + void Classification::SetMean(const string& mean_file) + { + BlobProto blob_proto; + ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); + /* Convert from BlobProto to Blob */ + Blob mean_blob; + mean_blob.FromProto(blob_proto); + CHECK_EQ(mean_blob.channels(), num_channels_) + << "Number of channels of mean file doesn't match input layer."; + /* The format of the mean file is planar 32-bit float BGR or grayscale. */ + std::vector channels; + float* data = mean_blob.mutable_cpu_data(); + for (int i = 0; i < num_channels_; ++i) + { + /* Extract an individual channel. */ + cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); + channels.push_back(channel); + data += mean_blob.height() * mean_blob.width(); + } + /* Merge the separate channels into a single image. */ + cv::Mat mean; + cv::merge(channels, mean); + /* Compute the global mean pixel value and create a mean image + * filled with this value. */ + cv::Scalar channel_mean = cv::mean(mean); + mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); + }; + + void Classification::FeatureExtract(InputArray inputimg, OutputArray feature, bool mean_subtract) + { + Blob* input_layer = net_->input_blobs()[0]; + input_layer->Reshape(1, num_channels_, + input_geometry_.height, input_geometry_.width); + /* Forward dimension change to all layers. */ + net_->Reshape(); + std::vector input_channels; + WrapInputLayer(&input_channels); + if (inputimg.kind() == 65536) + {/* this is a Mat */ + Mat img = inputimg.getMat(); + Preprocess(img, &input_channels, mean_subtract); + net_->ForwardPrefilled(); + /* Copy the output layer to a std::vector */ + Blob* output_layer = net_->output_blobs()[0]; + const float* begin = output_layer->cpu_data(); + const float* end = begin + output_layer->channels(); + std::vector featureVec = std::vector(begin, end); + cv::Mat feature_mat = cv::Mat(featureVec, true).t(); + feature_mat.copyTo(feature); + } + else + {/* This is a vector */ + vector img; + inputimg.getMatVector(img); + Mat feature_vector; + for (unsigned int i = 0; i < img.size(); ++i) + { + Preprocess(img[i], &input_channels, mean_subtract); + net_->ForwardPrefilled(); + /* Copy the output layer to a std::vector */ + Blob* output_layer = net_->output_blobs()[0]; + const float* begin = output_layer->cpu_data(); + const float* end = begin + output_layer->channels(); + std::vector featureVec = std::vector(begin, end); + if (i == 0) + { + feature_vector = cv::Mat(featureVec, true).t(); + int dim_feature = feature_vector.cols; + feature_vector.resize(img.size(), dim_feature); + } + feature_vector.row(i) = cv::Mat(featureVec, true).t(); + } + feature_vector.copyTo(feature); + } + }; + + /* Wrap the input layer of the network in separate cv::Mat objects + * (one per channel). This way we save one memcpy operation and we + * don't need to rely on cudaMemcpy2D. The last preprocessing + * operation will write the separate channels directly to the input + * layer. */ + void Classification::WrapInputLayer(std::vector* input_channels) + { + Blob* input_layer = net_->input_blobs()[0]; + int width = input_layer->width(); + int height = input_layer->height(); + float* input_data = input_layer->mutable_cpu_data(); + for (int i = 0; i < input_layer->channels(); ++i) + { + cv::Mat channel(height, width, CV_32FC1, input_data); + input_channels->push_back(channel); + input_data += width * height; + } + }; + + void Classification::Preprocess(const cv::Mat& img, +std::vector* input_channels, bool mean_subtract) + { + /* Convert the input image to the input image format of the network. */ + cv::Mat sample; + if (img.channels() == 3 && num_channels_ == 1) + cv::cvtColor(img, sample, CV_BGR2GRAY); + else if (img.channels() == 4 && num_channels_ == 1) + cv::cvtColor(img, sample, CV_BGRA2GRAY); + else if (img.channels() == 4 && num_channels_ == 3) + cv::cvtColor(img, sample, CV_BGRA2BGR); + else if (img.channels() == 1 && num_channels_ == 3) + cv::cvtColor(img, sample, CV_GRAY2BGR); + else + sample = img; + cv::Mat sample_resized; + if (sample.size() != input_geometry_) + cv::resize(sample, sample_resized, input_geometry_); + else + sample_resized = sample; + cv::Mat sample_float; + if (num_channels_ == 3) + sample_resized.convertTo(sample_float, CV_32FC3); + else + sample_resized.convertTo(sample_float, CV_32FC1); + cv::Mat sample_normalized; + if (mean_subtract) + cv::subtract(sample_float, mean_, sample_normalized); + else + sample_normalized = sample_float; + /* This operation will write the separate BGR planes directly to the + * input layer of the network because it is wrapped by the cv::Mat + * objects in input_channels. */ + cv::split(sample_normalized, *input_channels); + CHECK(reinterpret_cast(input_channels->at(0).data) + == net_->input_blobs()[0]->cpu_data()) + << "Input channels are not wrapping the input layer of the network."; + }; +} +} diff --git a/modules/cnn_3dobj/src/cnn_datatrans.cpp b/modules/cnn_3dobj/src/cnn_datatrans.cpp deleted file mode 100644 index 4cf9fb448..000000000 --- a/modules/cnn_3dobj/src/cnn_datatrans.cpp +++ /dev/null @@ -1,237 +0,0 @@ -#include "precomp.hpp" -using std::string; -using namespace std; - -namespace cv -{ -namespace cnn_3dobj -{ - DataTrans::DataTrans() - { - }; - void DataTrans::list_dir(const char *path,vector& files,bool r) - { - DIR *pDir; - struct dirent *ent; - char childpath[512]; - pDir = opendir(path); - memset(childpath, 0, sizeof(childpath)); - while ((ent = readdir(pDir)) != NULL) - { - if (ent->d_type & DT_DIR) - { - - if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) - { - continue; - } - if(r) - { - sprintf(childpath, "%s/%s", path, ent->d_name); - DataTrans::list_dir(childpath,files,false); - } - } - else - { - files.push_back(ent->d_name); - } - } - sort(files.begin(),files.end()); - - }; - - string DataTrans::get_classname(string path) - { - int index = path.find_last_of('_'); - return path.substr(0, index); - } - - - int DataTrans::get_labelid(string fileName) - { - string class_name_tmp = get_classname(fileName); - all_class_name.insert(class_name_tmp); - map::iterator name_iter_tmp = class2id.find(class_name_tmp); - if (name_iter_tmp == class2id.end()) - { - int id = class2id.size(); - class2id.insert(name_iter_tmp, std::make_pair(class_name_tmp, id)); - return id; - } - else - { - return name_iter_tmp->second; - } - } - - void DataTrans::loadimg(string path,char* buffer,const bool is_color) - { - cv::Mat img = cv::imread(path, is_color); - string val; - int rows = img.rows; - int cols = img.cols; - int pos=0; - int channel; - if (is_color == 0) - { - channel = 1; - }else{ - channel = 3; - } - for (int c = 0; c < channel; c++) - { - for (int row = 0; row < rows; row++) - { - for (int col = 0; col < cols; col++) - { - buffer[pos++]=img.at(row,col)[c]; - } - } - } - - }; - - void DataTrans::convert(string imgdir,string outputdb,string attachdir,int channel,int width,int height) - { - leveldb::DB* db; - leveldb::Options options; - options.create_if_missing = true; - // options.error_if_exists = true; - caffe::Datum datum; - datum.set_channels(channel); - datum.set_height(height); - datum.set_width(width); - int image_size = channel*width*height; - char buffer[image_size]; - - string value; - CHECK(leveldb::DB::Open(options, outputdb, &db).ok()); - vector filenames; - list_dir(imgdir.c_str(),filenames, false); - string img_log = attachdir+"image_filename"; - ofstream writefile(img_log.c_str()); - for(int i=0;i<(int)filenames.size();i++) - { - string path= imgdir; - path.append(filenames[i]); - - loadimg(path,buffer,false); - - int labelid = get_labelid(filenames[i]); - - datum.set_label(labelid); - datum.set_data(buffer,image_size); - datum.SerializeToString(&value); - snprintf(buffer, image_size, "%05d", i); - printf("\nclassid:%d classname:%s abspath:%s",labelid,get_classname(filenames[i]).c_str(),path.c_str()); - db->Put(leveldb::WriteOptions(),string(buffer),value); - //printf("%d %s\n",i,fileNames[i].c_str()); - - assert(writefile.is_open()); - writefile<::iterator iter = all_class_name.begin(); - while(iter != all_class_name.end()) - { - assert(writefile.is_open()); - writefile<<(*iter)<<"\n"; - //printf("%s\n",(*iter).c_str()); - iter++; - } - writefile.close(); - - }; - - std::vector DataTrans::feature_extraction_pipeline(std::string pretrained_binary_proto, std::string feature_extraction_proto, std::string save_feature_dataset_names, std::string extract_feature_blob_names, int num_mini_batches, std::string device, int dev_id) { - if (strcmp(device.c_str(), "GPU") == 0) { - LOG(ERROR)<< "Using GPU"; - int device_id = 0; - if (strcmp(device.c_str(), "GPU") == 0) { - device_id = dev_id; - CHECK_GE(device_id, 0); - } - LOG(ERROR) << "Using Device_id=" << device_id; - Caffe::SetDevice(device_id); - Caffe::set_mode(Caffe::GPU); - } else { - LOG(ERROR) << "Using CPU"; - Caffe::set_mode(Caffe::CPU); - } - boost::shared_ptr > feature_extraction_net( - new Net(feature_extraction_proto, caffe::TEST)); - feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); - std::vector blob_names; - blob_names.push_back(extract_feature_blob_names); - std::vector dataset_names; - dataset_names.push_back(save_feature_dataset_names); - CHECK_EQ(blob_names.size(), dataset_names.size()) << - " the number of blob names and dataset names must be equal"; - size_t num_features = blob_names.size(); - - for (size_t i = 0; i < num_features; i++) { - CHECK(feature_extraction_net->has_blob(blob_names[i])) - << "Unknown feature blob name " << blob_names[i] - << " in the network " << feature_extraction_proto; - } - std::vector files; - for (size_t i = 0; i < num_features; ++i) - { - LOG(INFO) << "Opening file " << dataset_names[i]; - FILE * temp = fopen(dataset_names[i].c_str(), "wb"); - files.push_back(temp); - } - - - LOG(ERROR)<< "Extacting Features"; - - Datum datum; - std::vector featureVec; - std::vector*> input_vec; - std::vector image_indices(num_features, 0); - for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { - feature_extraction_net->Forward(input_vec); - for (size_t i = 0; i < num_features; ++i) { - const boost::shared_ptr > feature_blob = feature_extraction_net - ->blob_by_name(blob_names[i]); - int batch_size = feature_blob->num(); - int dim_features = feature_blob->count() / batch_size; - if (batch_index == 0) - { - int fea_num = batch_size*num_mini_batches; - fwrite(&dim_features, sizeof(int), 1, files[i]); - fwrite(&fea_num, sizeof(int), 1, files[i]); - } - const float* feature_blob_data; - for (int n = 0; n < batch_size; ++n) { - - feature_blob_data = feature_blob->cpu_data() + - feature_blob->offset(n); - fwrite(feature_blob_data, sizeof(float), dim_features, files[i]); - cv::Mat tempfeat = cv::Mat(1, dim_features, CV_32FC1); - for (int dim = 0; dim < dim_features; dim++) { - tempfeat.at(0,dim) = *(feature_blob_data++); - } - featureVec.push_back(tempfeat); - ++image_indices[i]; - if (image_indices[i] % 1000 == 0) { - LOG(ERROR)<< "Extracted features of " << image_indices[i] << - " query images for feature blob " << blob_names[i]; - } - } // for (int n = 0; n < batch_size; ++n) - } // for (int i = 0; i < num_features; ++i) - } // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) - // write the last batch - for (size_t i = 0; i < num_features; ++i) { - fclose(files[i]); - } - - LOG(ERROR)<< "Successfully extracted the features!"; - return featureVec; - }; -}} diff --git a/modules/cnn_3dobj/src/cnn_sphereview.cpp b/modules/cnn_3dobj/src/cnn_sphereview.cpp index 483e6ffd7..ca5fc7461 100644 --- a/modules/cnn_3dobj/src/cnn_sphereview.cpp +++ b/modules/cnn_3dobj/src/cnn_sphereview.cpp @@ -6,235 +6,230 @@ namespace cv { namespace cnn_3dobj { - IcoSphere::IcoSphere(float radius_in, int depth_in) - { - - X = 0.5f; - Z = 0.5f; - X *= (int)radius_in; - Z *= (int)radius_in; - diff = 0.00000005964; - float vdata[12][3] = { { -X, 0.0f, Z }, { X, 0.0f, Z }, - { -X, 0.0f, -Z }, { X, 0.0f, -Z }, { 0.0f, Z, X }, { 0.0f, Z, -X }, - { 0.0f, -Z, X }, { 0.0f, -Z, -X }, { Z, X, 0.0f }, { -Z, X, 0.0f }, - { Z, -X, 0.0f }, { -Z, -X, 0.0f } }; - - - int tindices[20][3] = { { 0, 4, 1 }, { 0, 9, 4 }, { 9, 5, 4 }, - { 4, 5, 8 }, { 4, 8, 1 }, { 8, 10, 1 }, { 8, 3, 10 }, { 5, 3, 8 }, - { 5, 2, 3 }, { 2, 7, 3 }, { 7, 10, 3 }, { 7, 6, 10 }, { 7, 11, 6 }, - { 11, 0, 6 }, { 0, 1, 6 }, { 6, 1, 10 }, { 9, 0, 11 }, - { 9, 11, 2 }, { 9, 2, 5 }, { 7, 2, 11 } }; - - // Iterate over points - for (int i = 0; i < 20; ++i) { - - subdivide(vdata[tindices[i][0]], vdata[tindices[i][1]], - vdata[tindices[i][2]], depth_in); - } - CameraPos_temp.push_back(CameraPos[0]); - for (int j = 1; j* temp = new std::vector; - for (int k = 0; k < 3; ++k) { - vertexList.push_back(v[k]); - vertexNormalsList.push_back(v[k]); - temp->push_back(v[k]); - } - temp_Campos.x = temp->at(0);temp_Campos.y = temp->at(1);temp_Campos.z = temp->at(2); - CameraPos.push_back(temp_Campos); - }; - - void IcoSphere::subdivide(float v1[], float v2[], float v3[], int depth) - { - - norm(v1); - norm(v2); - norm(v3); - if (depth == 0) { - add(v1); - add(v2); - add(v3); - return; - } - - float* v12 = new float[3]; - float* v23 = new float[3]; - float* v31 = new float[3]; - - for (int i = 0; i < 3; ++i) { - v12[i] = (v1[i] + v2[i]) / 2; - v23[i] = (v2[i] + v3[i]) / 2; - v31[i] = (v3[i] + v1[i]) / 2; - } - - norm(v12); - norm(v23); - norm(v31); - - subdivide(v1, v12, v31, depth - 1); - subdivide(v2, v23, v12, depth - 1); - subdivide(v3, v31, v23, depth - 1); - subdivide(v12, v23, v31, depth - 1); - }; - - uint32_t IcoSphere::swap_endian(uint32_t val) - { - val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); - return (val << 16) | (val >> 16); - }; - - cv::Point3d IcoSphere::getCenter(cv::Mat cloud) - { - Point3f* data = cloud.ptr(); - Point3d dataout; - for(int i = 0; i < cloud.cols; ++i) - { - dataout.x += data[i].x; - dataout.y += data[i].y; - dataout.z += data[i].z; - } - dataout.x = dataout.x/cloud.cols; - dataout.y = dataout.y/cloud.cols; - dataout.z = dataout.z/cloud.cols; - return dataout; - }; - - float IcoSphere::getRadius(cv::Mat cloud, cv::Point3d center) - { - float radiusCam = 0; - Point3f* data = cloud.ptr(); - Point3d datatemp; - for(int i = 0; i < cloud.cols; ++i) - { - datatemp.x = data[i].x - (float)center.x; - datatemp.y = data[i].y - (float)center.y; - datatemp.z = data[i].z - (float)center.z; - float Radius = sqrt(pow(datatemp.x,2)+pow(datatemp.y,2)+pow(datatemp.z,2)); - if(Radius > radiusCam) - { - radiusCam = Radius; - } - } - radiusCam *= 4; - return radiusCam; - }; - - void IcoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath) - { - char* a0 = (char*)malloc(1024); - strcpy(a0, headerPath); - char a1[] = "image"; - char a2[] = "label"; - char* headerPathimg = (char*)malloc(1024); - strcpy(headerPathimg, a0); - strcat(headerPathimg, a1); - char* headerPathlab = (char*)malloc(1024); - strcpy(headerPathlab, a0); - strcat(headerPathlab, a2); - std::ofstream headerImg(headerPathimg, ios::out|ios::binary); - std::ofstream headerLabel(headerPathlab, ios::out|ios::binary); - int headerimg[4] = {2051,num_item,rows,cols}; - for (int i=0; i<4; i++) - headerimg[i] = swap_endian(headerimg[i]); - int headerlabel[2] = {2050,num_item}; - for (int i=0; i<2; i++) - headerlabel[i] = swap_endian(headerlabel[i]); - headerImg.write(reinterpret_cast(headerimg), sizeof(int)*4); - headerImg.close(); - headerLabel.write(reinterpret_cast(headerlabel), sizeof(int)*2); - headerLabel.close(); - }; - - void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z) - { - int isrgb = 0; - cv::Mat ImgforBin = cv::imread(filenameImg, isrgb); - char* A0 = (char*)malloc(1024); - strcpy(A0, binaryPath); - char A1[] = "image"; - char A2[] = "label"; - char* binPathimg = (char*)malloc(1024); - strcpy(binPathimg, A0); - strcat(binPathimg, A1); - char* binPathlab = (char*)malloc(1024); - strcpy(binPathlab, A0); - strcat(binPathlab, A2); - fstream img_file, lab_file; - img_file.open(binPathimg,ios::in); - lab_file.open(binPathlab,ios::in); - if(!img_file) - { - cout << "Creating the training data at: " << binaryPath << ". " << endl; - char* a0 = (char*)malloc(1024); - strcpy(a0, headerPath); - char a1[] = "image"; - char a2[] = "label"; - char* headerPathimg = (char*)malloc(1024); - strcpy(headerPathimg, a0); - strcat(headerPathimg,a1); - char* headerPathlab = (char*)malloc(1024); - strcpy(headerPathlab, a0); - strcat(headerPathlab,a2); - createHeader(num_item, 64, 64, binaryPath); - img_file.open(binPathimg,ios::out|ios::binary|ios::app); - lab_file.open(binPathlab,ios::out|ios::binary|ios::app); - for (int r = 0; r < ImgforBin.rows; r++) - { - img_file.write(reinterpret_cast(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize()); - } - signed char templab = (signed char)label_class; - lab_file << templab << (signed char)x << (signed char)y << (signed char)z; - } - else - { - img_file.close(); - lab_file.close(); - img_file.open(binPathimg,ios::out|ios::binary|ios::app); - lab_file.open(binPathlab,ios::out|ios::binary|ios::app); - cout <<"Concatenating the training data at: " << binaryPath << ". " << endl; - for (int r = 0; r < ImgforBin.rows; r++) - { - img_file.write(reinterpret_cast(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize()); - } - signed char templab = (signed char)label_class; - lab_file << templab << (signed char)x << (signed char)y << (signed char)z; - } - img_file.close(); - lab_file.close(); - }; + IcoSphere::IcoSphere(float radius_in, int depth_in) + { + + X = 0.5f; + Z = 0.5f; + X *= (int)radius_in; + Z *= (int)radius_in; + diff = 0.00000005964; + float vdata[12][3] = { { -X, 0.0f, Z }, { X, 0.0f, Z }, + { -X, 0.0f, -Z }, { X, 0.0f, -Z }, { 0.0f, Z, X }, { 0.0f, Z, -X }, + { 0.0f, -Z, X }, { 0.0f, -Z, -X }, { Z, X, 0.0f }, { -Z, X, 0.0f }, + { Z, -X, 0.0f }, { -Z, -X, 0.0f } }; + int tindices[20][3] = { { 0, 4, 1 }, { 0, 9, 4 }, { 9, 5, 4 }, + { 4, 5, 8 }, { 4, 8, 1 }, { 8, 10, 1 }, { 8, 3, 10 }, { 5, 3, 8 }, + { 5, 2, 3 }, { 2, 7, 3 }, { 7, 10, 3 }, { 7, 6, 10 }, { 7, 11, 6 }, + { 11, 0, 6 }, { 0, 1, 6 }, { 6, 1, 10 }, { 9, 0, 11 }, + { 9, 11, 2 }, { 9, 2, 5 }, { 7, 2, 11 } }; + + // Iterate over points + for (int i = 0; i < 20; ++i) + { + subdivide(vdata[tindices[i][0]], vdata[tindices[i][1]], + vdata[tindices[i][2]], depth_in); + } + CameraPos_temp.push_back(CameraPos[0]); + for (int j = 1; j* temp = new std::vector; + for (int k = 0; k < 3; ++k) + { + vertexList.push_back(v[k]); + vertexNormalsList.push_back(v[k]); + temp->push_back(v[k]); + } + temp_Campos.x = temp->at(0);temp_Campos.y = temp->at(1);temp_Campos.z = temp->at(2); + CameraPos.push_back(temp_Campos); + }; + + void IcoSphere::subdivide(float v1[], float v2[], float v3[], int depth) + { + norm(v1); + norm(v2); + norm(v3); + if (depth == 0) + { + add(v1); + add(v2); + add(v3); + return; + } + float* v12 = new float[3]; + float* v23 = new float[3]; + float* v31 = new float[3]; + for (int i = 0; i < 3; ++i) + { + v12[i] = (v1[i] + v2[i]) / 2; + v23[i] = (v2[i] + v3[i]) / 2; + v31[i] = (v3[i] + v1[i]) / 2; + } + norm(v12); + norm(v23); + norm(v31); + subdivide(v1, v12, v31, depth - 1); + subdivide(v2, v23, v12, depth - 1); + subdivide(v3, v31, v23, depth - 1); + subdivide(v12, v23, v31, depth - 1); + }; + + uint32_t IcoSphere::swap_endian(uint32_t val) + { + val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); + return (val << 16) | (val >> 16); + }; + + cv::Point3d IcoSphere::getCenter(cv::Mat cloud) + { + Point3f* data = cloud.ptr(); + Point3d dataout; + for(int i = 0; i < cloud.cols; ++i) + { + dataout.x += data[i].x; + dataout.y += data[i].y; + dataout.z += data[i].z; + } + dataout.x = dataout.x/cloud.cols; + dataout.y = dataout.y/cloud.cols; + dataout.z = dataout.z/cloud.cols; + return dataout; + }; + + float IcoSphere::getRadius(cv::Mat cloud, cv::Point3d center) + { + float radiusCam = 0; + Point3f* data = cloud.ptr(); + Point3d datatemp; + for(int i = 0; i < cloud.cols; ++i) + { + datatemp.x = data[i].x - (float)center.x; + datatemp.y = data[i].y - (float)center.y; + datatemp.z = data[i].z - (float)center.z; + float Radius = sqrt(pow(datatemp.x,2)+pow(datatemp.y,2)+pow(datatemp.z,2)); + if(Radius > radiusCam) + { + radiusCam = Radius; + } + } + radiusCam *= 4; + return radiusCam; + }; + + void IcoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath) + { + char* a0 = (char*)malloc(1024); + strcpy(a0, headerPath); + char a1[] = "image"; + char a2[] = "label"; + char* headerPathimg = (char*)malloc(1024); + strcpy(headerPathimg, a0); + strcat(headerPathimg, a1); + char* headerPathlab = (char*)malloc(1024); + strcpy(headerPathlab, a0); + strcat(headerPathlab, a2); + std::ofstream headerImg(headerPathimg, ios::out|ios::binary); + std::ofstream headerLabel(headerPathlab, ios::out|ios::binary); + int headerimg[4] = {2051,num_item,rows,cols}; + for (int i=0; i<4; i++) + headerimg[i] = swap_endian(headerimg[i]); + int headerlabel[2] = {2050,num_item}; + for (int i=0; i<2; i++) + headerlabel[i] = swap_endian(headerlabel[i]); + headerImg.write(reinterpret_cast(headerimg), sizeof(int)*4); + headerImg.close(); + headerLabel.write(reinterpret_cast(headerlabel), sizeof(int)*2); + headerLabel.close(); + }; + + void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z) + { + int isrgb = 0; + cv::Mat ImgforBin = cv::imread(filenameImg, isrgb); + char* A0 = (char*)malloc(1024); + strcpy(A0, binaryPath); + char A1[] = "image"; + char A2[] = "label"; + char* binPathimg = (char*)malloc(1024); + strcpy(binPathimg, A0); + strcat(binPathimg, A1); + char* binPathlab = (char*)malloc(1024); + strcpy(binPathlab, A0); + strcat(binPathlab, A2); + fstream img_file, lab_file; + img_file.open(binPathimg,ios::in); + lab_file.open(binPathlab,ios::in); + if(!img_file) + { + cout << "Creating the training data at: " << binaryPath << ". " << endl; + char* a0 = (char*)malloc(1024); + strcpy(a0, headerPath); + char a1[] = "image"; + char a2[] = "label"; + char* headerPathimg = (char*)malloc(1024); + strcpy(headerPathimg, a0); + strcat(headerPathimg,a1); + char* headerPathlab = (char*)malloc(1024); + strcpy(headerPathlab, a0); + strcat(headerPathlab,a2); + createHeader(num_item, 64, 64, binaryPath); + img_file.open(binPathimg,ios::out|ios::binary|ios::app); + lab_file.open(binPathlab,ios::out|ios::binary|ios::app); + for (int r = 0; r < ImgforBin.rows; r++) + { + img_file.write(reinterpret_cast(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize()); + } + signed char templab = (signed char)label_class; + lab_file << templab << (signed char)x << (signed char)y << (signed char)z; + } + else + { + img_file.close(); + lab_file.close(); + img_file.open(binPathimg,ios::out|ios::binary|ios::app); + lab_file.open(binPathlab,ios::out|ios::binary|ios::app); + cout <<"Concatenating the training data at: " << binaryPath << ". " << endl; + for (int r = 0; r < ImgforBin.rows; r++) + { + img_file.write(reinterpret_cast(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize()); + } + signed char templab = (signed char)label_class; + lab_file << templab << (signed char)x << (signed char)y << (signed char)z; + } + img_file.close(); + lab_file.close(); + }; }}