parent
d6cb8889b4
commit
6f38d89160
17 changed files with 481 additions and 134 deletions
@ -0,0 +1,112 @@ |
|||||||
|
/*
|
||||||
|
* Software License Agreement (BSD License) |
||||||
|
* |
||||||
|
* Copyright (c) 2009, Willow Garage, Inc. |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* |
||||||
|
* * Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* * Redistributions in binary form must reproduce the above |
||||||
|
* copyright notice, this list of conditions and the following |
||||||
|
* disclaimer in the documentation and/or other materials provided |
||||||
|
* with the distribution. |
||||||
|
* * Neither the name of Willow Garage, Inc. nor the names of its |
||||||
|
* contributors may be used to endorse or promote products derived |
||||||
|
* from this software without specific prior written permission. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||||
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||||
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||||
|
* POSSIBILITY OF SUCH DAMAGE. |
||||||
|
* |
||||||
|
*/ |
||||||
|
#include <opencv2/cnn_3dobj.hpp> |
||||||
|
#include <iomanip> |
||||||
|
using namespace cv; |
||||||
|
using namespace std; |
||||||
|
using namespace cv::cnn_3dobj; |
||||||
|
int main(int argc, char** argv) |
||||||
|
{ |
||||||
|
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}" |
||||||
|
"{src_dir | ../data/images_all/ | Source direction of the images ready for being converted to leveldb dataset.}" |
||||||
|
"{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }" |
||||||
|
"{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }" |
||||||
|
"{channel | 1 | Channel of the images. }" |
||||||
|
"{width | 64 | Width of images}" |
||||||
|
"{height | 64 | Height of images}" |
||||||
|
"{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}" |
||||||
|
"{network_forDB | ../data/3d_triplet_galleryIMG.prototxt | Network definition file used for extracting feature from levelDB data, causion: the path of levelDB training samples must be wrotten in in .prototxt files in Phase TEST}" |
||||||
|
"{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | Output of the extracted feature in form of binary files together with the vector<cv::Mat> features as the feature.}" |
||||||
|
"{extract_feature_blob_names | feat | Layer used for feature extraction in CNN.}" |
||||||
|
"{num_mini_batches | 4 | Batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}" |
||||||
|
"{device | CPU | Device: CPU or GPU.}" |
||||||
|
"{dev_id | 0 | ID of GPU.}" |
||||||
|
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}" |
||||||
|
"{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}" |
||||||
|
"{label_file | ../data/dbfileimage_filename | A namelist including all gallery images.}" |
||||||
|
"{target_img | ../data/images_all/2_13.png | Path of image waiting to be classified.}" |
||||||
|
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"; |
||||||
|
cv::CommandLineParser parser(argc, argv, keys); |
||||||
|
parser.about("Demo for Sphere View data generation"); |
||||||
|
if (parser.has("help")) |
||||||
|
{ |
||||||
|
parser.printMessage(); |
||||||
|
return 0; |
||||||
|
} |
||||||
|
string src_dir = parser.get<string>("src_dir"); |
||||||
|
string src_dst = parser.get<string>("src_dst"); |
||||||
|
string attach_dir = parser.get<string>("attach_dir"); |
||||||
|
int channel = parser.get<int>("channel"); |
||||||
|
int width = parser.get<int>("width"); |
||||||
|
int height = parser.get<int>("height"); |
||||||
|
string caffemodel = parser.get<string>("caffemodel"); |
||||||
|
string network_forDB = parser.get<string>("network_forDB"); |
||||||
|
string save_feature_dataset_names = parser.get<string>("save_feature_dataset_names"); |
||||||
|
string extract_feature_blob_names = parser.get<string>("extract_feature_blob_names"); |
||||||
|
int num_mini_batches = parser.get<int>("num_mini_batches"); |
||||||
|
string device = parser.get<string>("device"); |
||||||
|
int dev_id = parser.get<int>("dev_id"); |
||||||
|
string network_forIMG = parser.get<string>("network_forIMG"); |
||||||
|
string mean_file = parser.get<string>("mean_file"); |
||||||
|
string label_file = parser.get<string>("label_file"); |
||||||
|
string target_img = parser.get<string>("target_img"); |
||||||
|
int num_candidate = parser.get<int>("num_candidate"); |
||||||
|
cv::cnn_3dobj::DataTrans transTemp; |
||||||
|
transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height); |
||||||
|
std::vector<cv::Mat> feature_reference = transTemp.feature_extraction_pipeline(caffemodel, network_forDB, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id); |
||||||
|
////start another demo
|
||||||
|
cv::cnn_3dobj::Classification classifier(network_forIMG, caffemodel, mean_file, label_file); |
||||||
|
|
||||||
|
std::cout << std::endl << "---------- Prediction for " |
||||||
|
<< target_img << " ----------" << std::endl; |
||||||
|
|
||||||
|
cv::Mat img = cv::imread(target_img, -1); |
||||||
|
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
|
||||||
|
std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl; |
||||||
|
std::vector<std::pair<string, float> > prediction; |
||||||
|
for (unsigned int i = 0; i < feature_reference.size(); i++) |
||||||
|
std::cout << feature_reference[i] << endl; |
||||||
|
cv::Mat feature_test = classifier.feature_extract(img, false); |
||||||
|
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test.t() << std::endl; |
||||||
|
prediction = classifier.Classify(feature_reference, img, num_candidate, false); |
||||||
|
// Print the top N prediction.
|
||||||
|
std::cout << std::endl << "---------- Prediction result(distance - file name in gallery) ----------" << std::endl; |
||||||
|
for (size_t i = 0; i < prediction.size(); ++i) { |
||||||
|
std::pair<string, float> p = prediction[i]; |
||||||
|
std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" |
||||||
|
<< p.first << "\"" << std::endl; |
||||||
|
} |
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,86 @@ |
|||||||
|
name: "3d_triplet" |
||||||
|
input: "data" |
||||||
|
input_dim: 1 |
||||||
|
input_dim: 1 |
||||||
|
input_dim: 64 |
||||||
|
input_dim: 64 |
||||||
|
layer { |
||||||
|
name: "conv1" |
||||||
|
type: "Convolution" |
||||||
|
bottom: "data" |
||||||
|
top: "conv1" |
||||||
|
convolution_param { |
||||||
|
num_output: 16 |
||||||
|
kernel_size: 8 |
||||||
|
stride: 1 |
||||||
|
} |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "pool1" |
||||||
|
type: "Pooling" |
||||||
|
bottom: "conv1" |
||||||
|
top: "pool1" |
||||||
|
pooling_param { |
||||||
|
pool: MAX |
||||||
|
kernel_size: 2 |
||||||
|
stride: 2 |
||||||
|
} |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "relu1" |
||||||
|
type: "ReLU" |
||||||
|
bottom: "pool1" |
||||||
|
top: "pool1" |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "conv2" |
||||||
|
type: "Convolution" |
||||||
|
bottom: "pool1" |
||||||
|
top: "conv2" |
||||||
|
convolution_param { |
||||||
|
num_output: 7 |
||||||
|
kernel_size: 5 |
||||||
|
stride: 1 |
||||||
|
} |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "pool2" |
||||||
|
type: "Pooling" |
||||||
|
bottom: "conv2" |
||||||
|
top: "pool2" |
||||||
|
pooling_param { |
||||||
|
pool: MAX |
||||||
|
kernel_size: 2 |
||||||
|
stride: 2 |
||||||
|
} |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "relu2" |
||||||
|
type: "ReLU" |
||||||
|
bottom: "pool2" |
||||||
|
top: "pool2" |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "ip1" |
||||||
|
type: "InnerProduct" |
||||||
|
bottom: "pool2" |
||||||
|
top: "ip1" |
||||||
|
inner_product_param { |
||||||
|
num_output: 256 |
||||||
|
} |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "relu3" |
||||||
|
type: "ReLU" |
||||||
|
bottom: "ip1" |
||||||
|
top: "ip1" |
||||||
|
} |
||||||
|
layer { |
||||||
|
name: "feat" |
||||||
|
type: "InnerProduct" |
||||||
|
bottom: "ip1" |
||||||
|
top: "feat" |
||||||
|
inner_product_param { |
||||||
|
num_output: 4 |
||||||
|
} |
||||||
|
} |
Binary file not shown.
@ -0,0 +1,197 @@ |
|||||||
|
#include "precomp.hpp" |
||||||
|
using namespace caffe; |
||||||
|
using std::string; |
||||||
|
|
||||||
|
namespace cv |
||||||
|
{ |
||||||
|
namespace cnn_3dobj |
||||||
|
{ |
||||||
|
Classification::Classification(const string& model_file, const string& trained_file, const string& mean_file, const string& label_file) { |
||||||
|
#ifdef CPU_ONLY |
||||||
|
caffe::Caffe::set_mode(caffe::Caffe::CPU); |
||||||
|
#else |
||||||
|
caffe::Caffe::set_mode(caffe::Caffe::GPU); |
||||||
|
#endif |
||||||
|
|
||||||
|
/* Load the network. */ |
||||||
|
net_.reset(new Net<float>(model_file, TEST)); |
||||||
|
net_->CopyTrainedLayersFrom(trained_file); |
||||||
|
|
||||||
|
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input."; |
||||||
|
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output."; |
||||||
|
|
||||||
|
Blob<float>* input_layer = net_->input_blobs()[0]; |
||||||
|
num_channels_ = input_layer->channels(); |
||||||
|
CHECK(num_channels_ == 3 || num_channels_ == 1) |
||||||
|
<< "Input layer should have 1 or 3 channels."; |
||||||
|
input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); |
||||||
|
|
||||||
|
/* Load the binaryproto mean file. */ |
||||||
|
SetMean(mean_file); |
||||||
|
|
||||||
|
/* Load labels. */ |
||||||
|
std::ifstream labels(label_file.c_str()); |
||||||
|
CHECK(labels) << "Unable to open labels file " << label_file; |
||||||
|
string line; |
||||||
|
while (std::getline(labels, line)) |
||||||
|
labels_.push_back(string(line)); |
||||||
|
|
||||||
|
/* Blob<float>* output_layer = net_->output_blobs()[0];
|
||||||
|
CHECK_EQ(labels_.size(), output_layer->channels()) |
||||||
|
<< "Number of labels is different from the output layer dimension.";*/ |
||||||
|
} |
||||||
|
|
||||||
|
/*bool Classifier::PairCompare(const std::pair<float, int>& lhs,
|
||||||
|
const std::pair<float, int>& rhs) { |
||||||
|
return lhs.first > rhs.first; |
||||||
|
}*/ |
||||||
|
|
||||||
|
/* Return the indices of the top N values of vector v. */ |
||||||
|
std::vector<int> Classification::Argmax(const std::vector<float>& v, int N) { |
||||||
|
std::vector<std::pair<float, int> > pairs; |
||||||
|
for (size_t i = 0; i < v.size(); ++i) |
||||||
|
pairs.push_back(std::make_pair(v[i], i)); |
||||||
|
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end()); |
||||||
|
|
||||||
|
std::vector<int> result; |
||||||
|
for (int i = 0; i < N; ++i) |
||||||
|
result.push_back(pairs[i].second); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
//Return the top N predictions.
|
||||||
|
std::vector<std::pair<string, float> > Classification::Classify(const std::vector<cv::Mat>& reference, const cv::Mat& img, int N, bool mean_substract) { |
||||||
|
cv::Mat feature = feature_extract(img, mean_substract); |
||||||
|
std::vector<float> output; |
||||||
|
for (unsigned int i = 0; i < reference.size(); i++) { |
||||||
|
cv::Mat f1 = reference.at(i); |
||||||
|
cv::Mat f2 = feature; |
||||||
|
cv::Mat output_temp = f1.t()-f2; |
||||||
|
output.push_back(cv::norm(output_temp)); |
||||||
|
} |
||||||
|
std::vector<int> maxN = Argmax(output, N); |
||||||
|
std::vector<std::pair<string, float> > predictions; |
||||||
|
for (int i = 0; i < N; ++i) { |
||||||
|
int idx = maxN[i]; |
||||||
|
predictions.push_back(std::make_pair(labels_[idx], output[idx])); |
||||||
|
} |
||||||
|
|
||||||
|
return predictions; |
||||||
|
} |
||||||
|
|
||||||
|
/* Load the mean file in binaryproto format. */ |
||||||
|
void Classification::SetMean(const string& mean_file) { |
||||||
|
BlobProto blob_proto; |
||||||
|
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); |
||||||
|
|
||||||
|
/* Convert from BlobProto to Blob<float> */ |
||||||
|
Blob<float> mean_blob; |
||||||
|
mean_blob.FromProto(blob_proto); |
||||||
|
CHECK_EQ(mean_blob.channels(), num_channels_) |
||||||
|
<< "Number of channels of mean file doesn't match input layer."; |
||||||
|
|
||||||
|
/* The format of the mean file is planar 32-bit float BGR or grayscale. */ |
||||||
|
std::vector<cv::Mat> channels; |
||||||
|
float* data = mean_blob.mutable_cpu_data(); |
||||||
|
for (int i = 0; i < num_channels_; ++i) { |
||||||
|
/* Extract an individual channel. */ |
||||||
|
cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); |
||||||
|
channels.push_back(channel); |
||||||
|
data += mean_blob.height() * mean_blob.width(); |
||||||
|
} |
||||||
|
|
||||||
|
/* Merge the separate channels into a single image. */ |
||||||
|
cv::Mat mean; |
||||||
|
cv::merge(channels, mean); |
||||||
|
|
||||||
|
/* Compute the global mean pixel value and create a mean image
|
||||||
|
* filled with this value. */ |
||||||
|
cv::Scalar channel_mean = cv::mean(mean); |
||||||
|
mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); |
||||||
|
} |
||||||
|
|
||||||
|
cv::Mat Classification::feature_extract(const cv::Mat& img, bool mean_subtract) { |
||||||
|
Blob<float>* input_layer = net_->input_blobs()[0]; |
||||||
|
input_layer->Reshape(1, num_channels_, |
||||||
|
input_geometry_.height, input_geometry_.width); |
||||||
|
/* Forward dimension change to all layers. */ |
||||||
|
net_->Reshape(); |
||||||
|
|
||||||
|
std::vector<cv::Mat> input_channels; |
||||||
|
WrapInputLayer(&input_channels); |
||||||
|
|
||||||
|
Preprocess(img, &input_channels, mean_subtract); |
||||||
|
|
||||||
|
net_->ForwardPrefilled(); |
||||||
|
|
||||||
|
/* Copy the output layer to a std::vector */ |
||||||
|
Blob<float>* output_layer = net_->output_blobs()[0]; |
||||||
|
const float* begin = output_layer->cpu_data(); |
||||||
|
const float* end = begin + output_layer->channels(); |
||||||
|
//return std::vector<float>(begin, end);
|
||||||
|
std::vector<float> featureVec = std::vector<float>(begin, end); |
||||||
|
cv::Mat feature = cv::Mat(featureVec, true); |
||||||
|
return feature; |
||||||
|
} |
||||||
|
|
||||||
|
/* Wrap the input layer of the network in separate cv::Mat objects
|
||||||
|
* (one per channel). This way we save one memcpy operation and we |
||||||
|
* don't need to rely on cudaMemcpy2D. The last preprocessing |
||||||
|
* operation will write the separate channels directly to the input |
||||||
|
* layer. */ |
||||||
|
void Classification::WrapInputLayer(std::vector<cv::Mat>* input_channels) { |
||||||
|
Blob<float>* input_layer = net_->input_blobs()[0]; |
||||||
|
|
||||||
|
int width = input_layer->width(); |
||||||
|
int height = input_layer->height(); |
||||||
|
float* input_data = input_layer->mutable_cpu_data(); |
||||||
|
for (int i = 0; i < input_layer->channels(); ++i) { |
||||||
|
cv::Mat channel(height, width, CV_32FC1, input_data); |
||||||
|
input_channels->push_back(channel); |
||||||
|
input_data += width * height; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void Classification::Preprocess(const cv::Mat& img, |
||||||
|
std::vector<cv::Mat>* input_channels, bool mean_subtract) { |
||||||
|
/* Convert the input image to the input image format of the network. */ |
||||||
|
cv::Mat sample; |
||||||
|
if (img.channels() == 3 && num_channels_ == 1) |
||||||
|
cv::cvtColor(img, sample, CV_BGR2GRAY); |
||||||
|
else if (img.channels() == 4 && num_channels_ == 1) |
||||||
|
cv::cvtColor(img, sample, CV_BGRA2GRAY); |
||||||
|
else if (img.channels() == 4 && num_channels_ == 3) |
||||||
|
cv::cvtColor(img, sample, CV_BGRA2BGR); |
||||||
|
else if (img.channels() == 1 && num_channels_ == 3) |
||||||
|
cv::cvtColor(img, sample, CV_GRAY2BGR); |
||||||
|
else |
||||||
|
sample = img; |
||||||
|
|
||||||
|
cv::Mat sample_resized; |
||||||
|
if (sample.size() != input_geometry_) |
||||||
|
cv::resize(sample, sample_resized, input_geometry_); |
||||||
|
else |
||||||
|
sample_resized = sample; |
||||||
|
|
||||||
|
cv::Mat sample_float; |
||||||
|
if (num_channels_ == 3) |
||||||
|
sample_resized.convertTo(sample_float, CV_32FC3); |
||||||
|
else |
||||||
|
sample_resized.convertTo(sample_float, CV_32FC1); |
||||||
|
|
||||||
|
cv::Mat sample_normalized; |
||||||
|
if (mean_subtract) |
||||||
|
cv::subtract(sample_float, mean_, sample_normalized); |
||||||
|
else |
||||||
|
sample_normalized = sample_float; |
||||||
|
|
||||||
|
/* This operation will write the separate BGR planes directly to the
|
||||||
|
* input layer of the network because it is wrapped by the cv::Mat |
||||||
|
* objects in input_channels. */ |
||||||
|
cv::split(sample_normalized, *input_channels); |
||||||
|
|
||||||
|
CHECK(reinterpret_cast<float*>(input_channels->at(0).data) |
||||||
|
== net_->input_blobs()[0]->cpu_data()) |
||||||
|
<< "Input channels are not wrapping the input layer of the network."; |
||||||
|
} |
||||||
|
}} |
Loading…
Reference in new issue