From cabd5d4041330986bcfa24d8aabcf32e5df0d65f Mon Sep 17 00:00:00 2001
From: Wangyida <wangyida123@outlook.com>
Date: Sun, 16 Aug 2015 21:46:51 +0800
Subject: [PATCH] add RGB as an option for data generation for triplet training

---
 modules/cnn_3dobj/README.md                   |   6 +-
 .../cnn_3dobj/include/opencv2/cnn_3dobj.hpp   |  40 ++--
 .../cnn_3dobj/samples/classifyIMG_demo.cpp    |  83 ++++----
 .../samples/sphereview_3dobj_demo.cpp         |   6 +-
 modules/cnn_3dobj/src/cnn_feature.cpp         | 182 ++++++++----------
 modules/cnn_3dobj/src/cnn_sphereview.cpp      |  41 +++-
 .../test/test_cnn_3dobj_feature_extract.cpp   |  20 +-
 7 files changed, 189 insertions(+), 189 deletions(-)
diff --git a/modules/cnn_3dobj/README.md b/modules/cnn_3dobj/README.md
index 09512c0b1..a50428b7c 100644
--- a/modules/cnn_3dobj/README.md
+++ b/modules/cnn_3dobj/README.md
@@ -53,7 +53,7 @@ $ make
 =============
 #Demos
 ##Demo1: training data generation
-###Imagas generation from different pose, by default there are 4 models used, there will be 276 images in all which each class contains 69 iamges, if you want to use additional .ply models, it is necessary to change the class number parameter to the new class number and also give it a new class label.
+###Imagas generation from different pose, by default there are 4 models used, there will be 276 images in all which each class contains 69 iamges, if you want to use additional .ply models, it is necessary to change the class number parameter to the new class number and also give it a new class label. If you will train net work and extract feature from RGB images set the parameter rgb_use as 1.
 ```
 $ ./sphereview_test -plymodel=../3Dmodel/ape.ply -label_class=0
 ```
@@ -91,4 +91,8 @@ $ cd <opencv_contrib>/modules/cnn_3dobj/samples/build
 ```
 $ ./classify_test
 ```
+###if the classification and pose estimation issue need to extract mean got from all training images, you can run this:
+```
+$ ./classify_test -mean_file=../data/images_mean/triplet_mean.binaryproto
+```
 ==============================================
diff --git a/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp b/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
index 2b0d1ba52..11914703d 100644
--- a/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
+++ b/modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
@@ -128,7 +128,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
         CV_WRAP static void createHeader(int num_item, int rows, int cols, const char* headerPath);
         /** @brief Create header in binary files collecting the image data and label.
         */
-        CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z);
+        CV_WRAP static void writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z, int isrgb);
         /** @brief Write binary files used for training in other open source project.
         */
     };
@@ -136,39 +136,37 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
     class CV_EXPORTS_W descriptorExtractor
     {
         private:
-        caffe::Net<float>* net_;
-        cv::Size input_geometry_;
-        int num_channels_;
+        caffe::Net<float>* convnet;
+        cv::Size input_geometry;
+        int num_channels;
+        bool net_set;
+        int net_ready;
         cv::Mat mean_;
+        std::vector<string> device_info;
         void setMean(const string& mean_file);
-        /** @brief Load the mean file in binaryproto format.
+        /** @brief Load the mean file in binaryproto format if it is needed.
         */
-        void wrapInputLayer(std::vector<cv::Mat>* input_channels);
+        void wrapInput(std::vector<cv::Mat>* input_channels);
         /** @brief Wrap the input layer of the network in separate cv::Mat objects(one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer.
         */
-        void preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels, int net_ready);
+        void preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels);
         /** @brief Convert the input image to the input image format of the network.
         */
         public:
-        std::vector<string> labels_;
-        descriptorExtractor();
-        void listDir(const char *path,std::vector<string>& files,bool r);
-        /** @brief Get the file name from a root dictionary.
+        descriptorExtractor(const string& device_type, int device_id);
+        /** @brief Set the device for feature extraction.
         */
-        bool setNet(const string& cpu_only, int device_id);
-        /** @brief Initiate a classification structure.
+        std::vector<string> getDevice();
+        /** @brief Get device information for feature extraction.
         */
-        int loadNet(bool netsetter, const string& model_file, const string& trained_file);
-        /** @brief Initiate a classification structure.
+        void setDevice(const string& device_type, const string& device_id = "");
+        /** @brief Set device information for feature extraction.
         */
-        int loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file);
+        void loadNet(const string& model_file, const string& trained_file, string mean_file = "");
         /** @brief Initiate a classification structure.
         */
-        void getLabellist(const std::vector<string>& name_gallery);
-        /** @brief Get the label of the gallery images for result displaying in prediction.
-        */
-        void extract(int net_ready, InputArray inputimg, OutputArray feature, std::string feature_blob);
-        /** @brief Extract a single featrue of one image.
+        void extract(InputArrayOfArrays inputimg, OutputArray feature, std::string feature_blob);
+        /** @brief Extract features from a set of images.
         */
     };
     //! @}
diff --git a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp
index 5c07e20bc..44045cbf4 100644
--- a/modules/cnn_3dobj/samples/classifyIMG_demo.cpp
+++ b/modules/cnn_3dobj/samples/classifyIMG_demo.cpp
@@ -34,43 +34,40 @@
  */
 #define HAVE_CAFFE
 #include <opencv2/cnn_3dobj.hpp>
+#include <opencv2/features2d/features2d.hpp>
 #include <iomanip>
 using namespace cv;
 using namespace std;
 using namespace cv::cnn_3dobj;
 
-/* Return the indices of the top N values of vector v. */
-std::vector<int> argmax(const std::vector<float>& v, int N)
+/* Get the file name from a root dictionary. */
+void listDir(const char *path, std::vector<string>& files, bool r)
 {
-    std::vector<std::pair<float, int> > pairs;
-    for (size_t i = 0; i < v.size(); ++i)
-        pairs.push_back(std::make_pair(v[i], i));
-    std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end());
-    std::vector<int> result;
-    for (int i = 0; i < N; ++i)
-        result.push_back(pairs[i].second);
-    return result;
-};
-
-/* Return the indices of the top N values of vector v. */
-std::vector<std::pair<string, float> > classify(const cv::Mat& reference, const cv::Mat& target, int N, std::vector<string> labels_)
-{
-    std::vector<float> output;
-    for (int i = 0; i < reference.rows; i++)
-    {
-        cv::Mat f1 = reference.row(i);
-        cv::Mat f2 = target;
-        cv::Mat output_temp = f1-f2;
-        output.push_back(cv::norm(output_temp));
-    }
-    std::vector<int> maxN = argmax(output, N);
-    std::vector<std::pair<string, float> > predictions;
-    for (int i = 0; i < N; ++i)
+    DIR *pDir;
+    struct dirent *ent;
+    char childpath[512];
+    pDir = opendir(path);
+    memset(childpath, 0, sizeof(childpath));
+    while ((ent = readdir(pDir)) != NULL)
     {
-        int idx = maxN[i];
-        predictions.push_back(std::make_pair(labels_[idx], output[idx]));
+        if (ent->d_type & DT_DIR)
+        {
+            if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
+            {
+                continue;
+            }
+            if(r)
+            {
+                sprintf(childpath, "%s/%s", path, ent->d_name);
+                listDir(childpath,files,false);
+            }
+        }
+        else
+        {
+            files.push_back(ent->d_name);
+        }
     }
-    return predictions;
+    sort(files.begin(),files.end());
 };
 
 int main(int argc, char** argv)
@@ -82,7 +79,7 @@ int main(int argc, char** argv)
 "{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}"
 "{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}"
 "{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
-"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"
+"{num_candidate | 15 | Number of candidates in gallery as the prediction result.}"
 "{device | CPU | device}"
 "{dev_id | 0 | dev_id}";
     cv::CommandLineParser parser(argc, argv, keys);
@@ -102,16 +99,15 @@ int main(int argc, char** argv)
     string device = parser.get<string>("device");
     int dev_id = parser.get<int>("dev_id");
 
-    cv::cnn_3dobj::descriptorExtractor descriptor;
-    bool set_succeed = descriptor.setNet(device, dev_id);
-    int net_ready;
+    cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
+    std::vector<string> device_info = descriptor.getter();
+    std::cout << "Using" << device_info[0] << std::endl;
     if (strcmp(mean_file.c_str(), "no") == 0)
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel);
+        descriptor.loadNet(network_forIMG, caffemodel);
     else
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file);
+        descriptor.loadNet(network_forIMG, caffemodel, mean_file);
     std::vector<string> name_gallery;
-    descriptor.listDir(src_dir.c_str(), name_gallery, false);
-    descriptor.getLabellist(name_gallery);
+    listDir(src_dir.c_str(), name_gallery, false);
     for (unsigned int i = 0; i < name_gallery.size(); i++) {
         name_gallery[i] = src_dir + name_gallery[i];
     }
@@ -120,7 +116,7 @@ int main(int argc, char** argv)
     for (unsigned int i = 0; i < name_gallery.size(); i++) {
         img_gallery.push_back(cv::imread(name_gallery[i], -1));
     }
-    descriptor.extract(net_ready, img_gallery, feature_reference, feature_blob);
+    descriptor.extract(img_gallery, feature_reference, feature_blob);
 
     std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl;
 
@@ -131,14 +127,15 @@ int main(int argc, char** argv)
     for (unsigned int i = 0; i < feature_reference.rows; i++)
         std::cout << feature_reference.row(i) << endl;
     cv::Mat feature_test;
-    descriptor.extract(net_ready, img, feature_test, feature_blob);
+    descriptor.extract(img, feature_test, feature_blob);
+    cv::BFMatcher matcher(NORM_L2);
+    std::vector<std::vector<cv::DMatch> > matches;
+    matcher.knnMatch(feature_test, feature_reference, matches, num_candidate);
     std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl;
-    prediction = classify(feature_reference, feature_test, num_candidate, descriptor.labels_);
     // Print the top N prediction.
     std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
-    for (size_t i = 0; i < prediction.size(); ++i) {
-    std::pair<string, float> p = prediction[i];
-    std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" << p.first << "\"" << std::endl;
+    for (size_t i = 0; i < matches[0].size(); ++i) {
+        std::cout << i << " - " << std::fixed << std::setprecision(2) << name_gallery[matches[0][i].trainIdx] << " - \""  << matches[0][i].distance << "\"" << std::endl;
     }
     return 0;
 }
diff --git a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp b/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
index 4e348dc14..f52c766e9 100644
--- a/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
+++ b/modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
@@ -48,7 +48,8 @@ int main(int argc, char *argv[])
 "{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }"
 "{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }"
 "{num_class | 4 | total number of classes of models}"
-"{label_class | 0 | class label of current .ply model}";
+"{label_class | 0 | class label of current .ply model}"
+"{rgb_use | 0 | use RGB image or grayscale}";
     cv::CommandLineParser parser(argc, argv, keys);
     parser.about("Demo for Sphere View data generation");
     if (parser.has("help"))
@@ -62,6 +63,7 @@ int main(int argc, char *argv[])
     string labeldir = parser.get<string>("labeldir");
     int num_class = parser.get<int>("num_class");
     int label_class = parser.get<int>("label_class");
+    int rgb_use = parser.get<int>("rgb_use");
     cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth);
     std::vector<cv::Point3d> campos = ViewSphere.CameraPos;
     std::fstream imglabel;
@@ -122,7 +124,7 @@ int main(int argc, char *argv[])
         if (camera_pov)
             myWindow.setViewerPose(cam_pose);
         myWindow.saveScreenshot(filename);
-        ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100));
+        ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100), rgb_use);
     }
     imglabel.close();
     return 1;
diff --git a/modules/cnn_3dobj/src/cnn_feature.cpp b/modules/cnn_3dobj/src/cnn_feature.cpp
index b0879f0a7..f83192041 100644
--- a/modules/cnn_3dobj/src/cnn_feature.cpp
+++ b/modules/cnn_3dobj/src/cnn_feature.cpp
@@ -6,117 +6,100 @@ namespace cv
 {
 namespace cnn_3dobj
 {
-    descriptorExtractor::descriptorExtractor(){};
-    void descriptorExtractor::listDir(const char *path,vector<string>& files,bool r)
+    descriptorExtractor::descriptorExtractor(const string& device_type, int device_id)
     {
-        DIR *pDir;
-        struct dirent *ent;
-        char childpath[512];
-        pDir = opendir(path);
-        memset(childpath, 0, sizeof(childpath));
-        while ((ent = readdir(pDir)) != NULL)
+        if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
         {
-            if (ent->d_type & DT_DIR)
+            if (strcmp(device_type.c_str(), "CPU") == 0)
             {
-                if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
-                {
-                    continue;
-                }
-                if(r)
-                {
-                    sprintf(childpath, "%s/%s", path, ent->d_name);
-                    descriptorExtractor::listDir(childpath,files,false);
-                }
+                caffe::Caffe::set_mode(caffe::Caffe::CPU);
+                device_info.push_back("CPU");
+                std::cout << "Using CPU" << std::endl;
             }
             else
             {
-                files.push_back(ent->d_name);
+                caffe::Caffe::set_mode(caffe::Caffe::GPU);
+                caffe::Caffe::SetDevice(device_id);
+                device_info.push_back("GPU");
+                std::cout << "Using GPU" << std::endl;
+                std::cout << "Using Device_id=" << device_id << std::endl;
             }
+            net_set = true;
         }
-        sort(files.begin(),files.end());
+        else
+        {
+            std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl;
+            net_set = false;
+        }
+    };
+
+    std::vector<string> descriptorExtractor::getDevice()
+    {
+        std::vector<string> device_info_out;
+        device_info_out = device_info;
+        return device_info_out;
     };
 
-    bool descriptorExtractor::setNet(const string& cpu_only, int device_id)
+    void descriptorExtractor::setDevice(const string& device_type, const string& device_id)
     {
-        if (strcmp(cpu_only.c_str(), "CPU") == 0 || strcmp(cpu_only.c_str(), "GPU") == 0)
+        if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
         {
-            if (strcmp(cpu_only.c_str(), "CPU") == 0)
+            if (strcmp(device_type.c_str(), "CPU") == 0)
             {
                 caffe::Caffe::set_mode(caffe::Caffe::CPU);
+                device_info.push_back("CPU");
+                std::cout << "Using CPU" << std::endl;
             }
             else
             {
+                int dev_id = atoi(device_id.c_str());
                 caffe::Caffe::set_mode(caffe::Caffe::GPU);
-                caffe::Caffe::SetDevice(device_id);
-                std::cout << "Using Device_id=" << device_id << std::endl;
+                caffe::Caffe::SetDevice(dev_id);
+                device_info.push_back("GPU");
+                std::cout << "Using GPU" << std::endl;
+                std::cout << "Using Device_id=" << dev_id << std::endl;
             }
-            return true;
+            net_set = true;
         }
         else
         {
             std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl;
-            return false;
+            net_set = false;
         }
     };
 
-    int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file)
+    void descriptorExtractor::loadNet(const string& model_file, const string& trained_file, string mean_file)
     {
-        int net_ready = 0;
-        if (netsetter)
+        net_ready = 0;
+        if (net_set)
         {
             /* Load the network. */
-            net_ = new Net<float>(model_file, TEST);
-            net_->CopyTrainedLayersFrom(trained_file);
-            if (net_->num_inputs() != 1)
+            convnet = new Net<float>(model_file, TEST);
+            convnet->CopyTrainedLayersFrom(trained_file);
+            if (convnet->num_inputs() != 1)
                 std::cout << "Network should have exactly one input." << std::endl;
-            if (net_->num_outputs() != 1)
+            if (convnet->num_outputs() != 1)
                 std::cout << "Network should have exactly one output." << std::endl;
-            Blob<float>* input_layer = net_->input_blobs()[0];
-            num_channels_ = input_layer->channels();
-            if (num_channels_ != 3 && num_channels_ != 1)
+            Blob<float>* input_layer = convnet->input_blobs()[0];
+            num_channels = input_layer->channels();
+            if (num_channels != 3 && num_channels != 1)
                 std::cout << "Input layer should have 1 or 3 channels." << std::endl;
-            input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
+            input_geometry = cv::Size(input_layer->width(), input_layer->height());
             /* Load the binaryproto mean file. */
-            setMean(mean_file);
-            net_ready = 2;
-        }
-        else
-        {
-            std::cout << "Error: Device must be set in advance using SetNet function" << std::endl;
-        }
-        return net_ready;
-    };
-
-    int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file)
-    {
-        int net_ready = 0;
-        if (netsetter)
-        {
-            /* Load the network. */
-            net_ = new Net<float>(model_file, TEST);
-            net_->CopyTrainedLayersFrom(trained_file);
-            if (net_->num_inputs() != 1)
-                std::cout << "Network should have exactly one input." << std::endl;
-            if (net_->num_outputs() != 1)
-                std::cout << "Network should have exactly one output." << std::endl;
-            Blob<float>* input_layer = net_->input_blobs()[0];
-            num_channels_ = input_layer->channels();
-            if (num_channels_ != 3 && num_channels_ != 1)
-                std::cout << "Input layer should have 1 or 3 channels." << std::endl;
-            input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
-            net_ready = 1;
+            if (!mean_file.empty())
+            {
+                setMean(mean_file);
+                net_ready = 2;
+            }
+            else
+            {
+                net_ready = 1;
+            }
         }
         else
         {
             std::cout << "Error: Device must be set in advance using SetNet function" << std::endl;
         }
-        return net_ready;
-    };
-
-    void descriptorExtractor::getLabellist(const std::vector<string>& name_gallery)
-    {
-        for (unsigned int i = 0; i < name_gallery.size(); ++i)
-            labels_.push_back(name_gallery[i]);
     };
 
     /* Load the mean file in binaryproto format. */
@@ -127,12 +110,12 @@ namespace cnn_3dobj
         /* Convert from BlobProto to Blob<float> */
         Blob<float> mean_blob;
         mean_blob.FromProto(blob_proto);
-        if (mean_blob.channels() != num_channels_)
+        if (mean_blob.channels() != num_channels)
             std::cout << "Number of channels of mean file doesn't match input layer." << std::endl;
         /* The format of the mean file is planar 32-bit float BGR or grayscale. */
         std::vector<cv::Mat> channels;
         float* data = mean_blob.mutable_cpu_data();
-        for (int i = 0; i < num_channels_; ++i)
+        for (int i = 0; i < num_channels; ++i)
         {
             /* Extract an individual channel. */
             cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
@@ -145,27 +128,27 @@ namespace cnn_3dobj
         /* Compute the global mean pixel value and create a mean image
          * filled with this value. */
         cv::Scalar channel_mean = cv::mean(mean);
-        mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
+        mean_ = cv::Mat(input_geometry, mean.type(), channel_mean);
     };
 
-    void descriptorExtractor::extract(int net_ready, InputArray inputimg, OutputArray feature, std::string featrue_blob)
+    void descriptorExtractor::extract(InputArrayOfArrays inputimg, OutputArray feature, std::string feature_blob)
     {
         if (net_ready)
         {
-            Blob<float>* input_layer = net_->input_blobs()[0];
-            input_layer->Reshape(1, num_channels_,
-            input_geometry_.height, input_geometry_.width);
+            Blob<float>* input_layer = convnet->input_blobs()[0];
+            input_layer->Reshape(1, num_channels,
+            input_geometry.height, input_geometry.width);
             /* Forward dimension change to all layers. */
-            net_->Reshape();
+            convnet->Reshape();
             std::vector<cv::Mat> input_channels;
-            wrapInputLayer(&input_channels);
+            wrapInput(&input_channels);
             if (inputimg.kind() == 65536)
             {/* this is a Mat */
                 Mat img = inputimg.getMat();
-                preprocess(img, &input_channels, net_ready);
-                net_->ForwardPrefilled();
+                preprocess(img, &input_channels);
+                convnet->ForwardPrefilled();
                 /* Copy the output layer to a std::vector */
-                Blob<float>* output_layer = net_->blob_by_name(featrue_blob).get();
+                Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
                 const float* begin = output_layer->cpu_data();
                 const float* end = begin + output_layer->channels();
                 std::vector<float> featureVec = std::vector<float>(begin, end);
@@ -179,10 +162,10 @@ namespace cnn_3dobj
                 Mat feature_vector;
                 for (unsigned int i = 0; i < img.size(); ++i)
                 {
-                    preprocess(img[i], &input_channels, net_ready);
-                    net_->ForwardPrefilled();
+                    preprocess(img[i], &input_channels);
+                    convnet->ForwardPrefilled();
                     /* Copy the output layer to a std::vector */
-                    Blob<float>* output_layer = net_->blob_by_name(featrue_blob).get();
+                    Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
                     const float* begin = output_layer->cpu_data();
                     const float* end = begin + output_layer->channels();
                     std::vector<float> featureVec = std::vector<float>(begin, end);
@@ -206,9 +189,9 @@ namespace cnn_3dobj
     * don't need to rely on cudaMemcpy2D. The last preprocessing
     * operation will write the separate channels directly to the input
     * layer. */
-    void descriptorExtractor::wrapInputLayer(std::vector<cv::Mat>* input_channels)
+    void descriptorExtractor::wrapInput(std::vector<cv::Mat>* input_channels)
     {
-        Blob<float>* input_layer = net_->input_blobs()[0];
+        Blob<float>* input_layer = convnet->input_blobs()[0];
         int width = input_layer->width();
         int height = input_layer->height();
         float* input_data = input_layer->mutable_cpu_data();
@@ -220,28 +203,27 @@ namespace cnn_3dobj
         }
     };
 
-    void descriptorExtractor::preprocess(const cv::Mat& img,
-std::vector<cv::Mat>* input_channels, int net_ready)
+    void descriptorExtractor::preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels)
     {
         /* Convert the input image to the input image format of the network. */
         cv::Mat sample;
-        if (img.channels() == 3 && num_channels_ == 1)
+        if (img.channels() == 3 && num_channels == 1)
             cv::cvtColor(img, sample, CV_BGR2GRAY);
-        else if (img.channels() == 4 && num_channels_ == 1)
+        else if (img.channels() == 4 && num_channels == 1)
             cv::cvtColor(img, sample, CV_BGRA2GRAY);
-        else if (img.channels() == 4 && num_channels_ == 3)
+        else if (img.channels() == 4 && num_channels == 3)
             cv::cvtColor(img, sample, CV_BGRA2BGR);
-        else if (img.channels() == 1 && num_channels_ == 3)
+        else if (img.channels() == 1 && num_channels == 3)
             cv::cvtColor(img, sample, CV_GRAY2BGR);
         else
             sample = img;
         cv::Mat sample_resized;
-        if (sample.size() != input_geometry_)
-            cv::resize(sample, sample_resized, input_geometry_);
+        if (sample.size() != input_geometry)
+            cv::resize(sample, sample_resized, input_geometry);
         else
         sample_resized = sample;
         cv::Mat sample_float;
-        if (num_channels_ == 3)
+        if (num_channels == 3)
             sample_resized.convertTo(sample_float, CV_32FC3);
         else
             sample_resized.convertTo(sample_float, CV_32FC1);
@@ -255,7 +237,7 @@ std::vector<cv::Mat>* input_channels, int net_ready)
         * objects in input_channels. */
         cv::split(sample_normalized, *input_channels);
         if (reinterpret_cast<float*>(input_channels->at(0).data)
-      != net_->input_blobs()[0]->cpu_data())
+      != convnet->input_blobs()[0]->cpu_data())
             std::cout << "Input channels are not wrapping the input layer of the network." << std::endl;
     };
 }
diff --git a/modules/cnn_3dobj/src/cnn_sphereview.cpp b/modules/cnn_3dobj/src/cnn_sphereview.cpp
index 29d44827e..080881a28 100644
--- a/modules/cnn_3dobj/src/cnn_sphereview.cpp
+++ b/modules/cnn_3dobj/src/cnn_sphereview.cpp
@@ -175,9 +175,8 @@ namespace cnn_3dobj
         headerLabel.close();
     };
 
-    void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z)
+    void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z, int isrgb)
     {
-        int isrgb = 0;
         cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);
         char* A0 = (char*)malloc(1024);
         strcpy(A0, binaryPath);
@@ -208,9 +207,24 @@ namespace cnn_3dobj
             createHeader(num_item, 64, 64, binaryPath);
             img_file.open(binPathimg,ios::out|ios::binary|ios::app);
             lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
-            for (int r = 0; r < ImgforBin.rows; r++)
+            if (isrgb == 0)
             {
-                img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                for (int r = 0; r < ImgforBin.rows; r++)
+                {
+                    img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                }
+            }
+            else
+            {
+                std::vector<cv::Mat> Img3forBin;
+                cv::split(ImgforBin,Img3forBin);
+                for (unsigned int i = 0; i < Img3forBin.size(); i++)
+                {
+                    for (int r = 0; r < Img3forBin[i].rows; r++)
+                    {
+                        img_file.write(reinterpret_cast<const char*>(Img3forBin[i].ptr(r)), Img3forBin[i].cols*Img3forBin[i].elemSize());
+                    }
+                }
             }
             signed char templab = (signed char)label_class;
             lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
@@ -222,9 +236,24 @@ namespace cnn_3dobj
             img_file.open(binPathimg,ios::out|ios::binary|ios::app);
             lab_file.open(binPathlab,ios::out|ios::binary|ios::app);
             cout <<"Concatenating the training data at: " << binaryPath << ". " << endl;
-            for (int r = 0; r < ImgforBin.rows; r++)
+            if (isrgb == 0)
+            {
+                for (int r = 0; r < ImgforBin.rows; r++)
+                {
+                    img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                }
+            }
+            else
             {
-                img_file.write(reinterpret_cast<const char*>(ImgforBin.ptr(r)), ImgforBin.cols*ImgforBin.elemSize());
+                std::vector<cv::Mat> Img3forBin;
+                cv::split(ImgforBin,Img3forBin);
+                for (unsigned int i = 0; i < Img3forBin.size(); i++)
+                {
+                    for (int r = 0; r < Img3forBin[i].rows; r++)
+                    {
+                        img_file.write(reinterpret_cast<const char*>(Img3forBin[i].ptr(r)), Img3forBin[i].cols*Img3forBin[i].elemSize());
+                    }
+                }
             }
             signed char templab = (signed char)label_class;
             lab_file << templab << (signed char)x << (signed char)y << (signed char)z;
diff --git a/modules/cnn_3dobj/test/test_cnn_3dobj_feature_extract.cpp b/modules/cnn_3dobj/test/test_cnn_3dobj_feature_extract.cpp
index 15fa87d6d..5d17eac93 100644
--- a/modules/cnn_3dobj/test/test_cnn_3dobj_feature_extract.cpp
+++ b/modules/cnn_3dobj/test/test_cnn_3dobj_feature_extract.cpp
@@ -34,23 +34,11 @@ void CV_CNN_Feature_Test::run(int)
     string device = "CPU";
     int dev_id = 0;
 
-    cv::cnn_3dobj::descriptorExtractor descriptor;
-    bool set_succeed = descriptor.setNet(device, dev_id);
-    if (!set_succeed) {
-      ts->printf(cvtest::TS::LOG, "Net parameters which is GPU or CPU could not be set");
-      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-      return;
-    }
-    int net_ready;
+    cv::cnn_3dobj::descriptorExtractor descriptor(device, dev_id);
     if (strcmp(mean_file.c_str(), "no") == 0)
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel);
+        descriptor.loadNet(network_forIMG, caffemodel);
     else
-        net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file);
-    if (!net_ready) {
-      ts->printf(cvtest::TS::LOG, "No model loaded");
-      ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);
-      return;
-    }
+        descriptor.loadNet(network_forIMG, caffemodel, mean_file);
     cv::Mat img = cv::imread(target_img, -1);
     if (img.empty()) {
       ts->printf(cvtest::TS::LOG, "could not read image %s\n", target_img.c_str());
@@ -58,7 +46,7 @@ void CV_CNN_Feature_Test::run(int)
       return;
     }
     cv::Mat feature_test;
-    descriptor.extract(net_ready, img, feature_test, feature_blob);
+    descriptor.extract(img, feature_test, feature_blob);
     if (feature_test.empty()) {
       ts->printf(cvtest::TS::LOG, "could not extract feature from image %s\n", target_img.c_str());
       ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA);