parent
d6cb8889b4
commit
6f38d89160
17 changed files with 481 additions and 134 deletions
@ -0,0 +1,112 @@ |
||||
/*
|
||||
* Software License Agreement (BSD License) |
||||
* |
||||
* Copyright (c) 2009, Willow Garage, Inc. |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions |
||||
* are met: |
||||
* |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* * Neither the name of Willow Garage, Inc. nor the names of its |
||||
* contributors may be used to endorse or promote products derived |
||||
* from this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
#include <opencv2/cnn_3dobj.hpp> |
||||
#include <iomanip> |
||||
using namespace cv; |
||||
using namespace std; |
||||
using namespace cv::cnn_3dobj; |
||||
int main(int argc, char** argv) |
||||
{ |
||||
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}" |
||||
"{src_dir | ../data/images_all/ | Source direction of the images ready for being converted to leveldb dataset.}" |
||||
"{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }" |
||||
"{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }" |
||||
"{channel | 1 | Channel of the images. }" |
||||
"{width | 64 | Width of images}" |
||||
"{height | 64 | Height of images}" |
||||
"{caffemodel | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}" |
||||
"{network_forDB | ../data/3d_triplet_galleryIMG.prototxt | Network definition file used for extracting feature from levelDB data, causion: the path of levelDB training samples must be wrotten in in .prototxt files in Phase TEST}" |
||||
"{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | Output of the extracted feature in form of binary files together with the vector<cv::Mat> features as the feature.}" |
||||
"{extract_feature_blob_names | feat | Layer used for feature extraction in CNN.}" |
||||
"{num_mini_batches | 4 | Batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}" |
||||
"{device | CPU | Device: CPU or GPU.}" |
||||
"{dev_id | 0 | ID of GPU.}" |
||||
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}" |
||||
"{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}" |
||||
"{label_file | ../data/dbfileimage_filename | A namelist including all gallery images.}" |
||||
"{target_img | ../data/images_all/2_13.png | Path of image waiting to be classified.}" |
||||
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
parser.about("Demo for Sphere View data generation"); |
||||
if (parser.has("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
string src_dir = parser.get<string>("src_dir"); |
||||
string src_dst = parser.get<string>("src_dst"); |
||||
string attach_dir = parser.get<string>("attach_dir"); |
||||
int channel = parser.get<int>("channel"); |
||||
int width = parser.get<int>("width"); |
||||
int height = parser.get<int>("height"); |
||||
string caffemodel = parser.get<string>("caffemodel"); |
||||
string network_forDB = parser.get<string>("network_forDB"); |
||||
string save_feature_dataset_names = parser.get<string>("save_feature_dataset_names"); |
||||
string extract_feature_blob_names = parser.get<string>("extract_feature_blob_names"); |
||||
int num_mini_batches = parser.get<int>("num_mini_batches"); |
||||
string device = parser.get<string>("device"); |
||||
int dev_id = parser.get<int>("dev_id"); |
||||
string network_forIMG = parser.get<string>("network_forIMG"); |
||||
string mean_file = parser.get<string>("mean_file"); |
||||
string label_file = parser.get<string>("label_file"); |
||||
string target_img = parser.get<string>("target_img"); |
||||
int num_candidate = parser.get<int>("num_candidate"); |
||||
cv::cnn_3dobj::DataTrans transTemp; |
||||
transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height); |
||||
std::vector<cv::Mat> feature_reference = transTemp.feature_extraction_pipeline(caffemodel, network_forDB, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id); |
||||
////start another demo
|
||||
cv::cnn_3dobj::Classification classifier(network_forIMG, caffemodel, mean_file, label_file); |
||||
|
||||
std::cout << std::endl << "---------- Prediction for " |
||||
<< target_img << " ----------" << std::endl; |
||||
|
||||
cv::Mat img = cv::imread(target_img, -1); |
||||
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
|
||||
std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl; |
||||
std::vector<std::pair<string, float> > prediction; |
||||
for (unsigned int i = 0; i < feature_reference.size(); i++) |
||||
std::cout << feature_reference[i] << endl; |
||||
cv::Mat feature_test = classifier.feature_extract(img, false); |
||||
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test.t() << std::endl; |
||||
prediction = classifier.Classify(feature_reference, img, num_candidate, false); |
||||
// Print the top N prediction.
|
||||
std::cout << std::endl << "---------- Prediction result(distance - file name in gallery) ----------" << std::endl; |
||||
for (size_t i = 0; i < prediction.size(); ++i) { |
||||
std::pair<string, float> p = prediction[i]; |
||||
std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" |
||||
<< p.first << "\"" << std::endl; |
||||
} |
||||
return 0; |
||||
} |
@ -0,0 +1,86 @@ |
||||
name: "3d_triplet" |
||||
input: "data" |
||||
input_dim: 1 |
||||
input_dim: 1 |
||||
input_dim: 64 |
||||
input_dim: 64 |
||||
layer { |
||||
name: "conv1" |
||||
type: "Convolution" |
||||
bottom: "data" |
||||
top: "conv1" |
||||
convolution_param { |
||||
num_output: 16 |
||||
kernel_size: 8 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool1" |
||||
type: "Pooling" |
||||
bottom: "conv1" |
||||
top: "pool1" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1" |
||||
type: "ReLU" |
||||
bottom: "pool1" |
||||
top: "pool1" |
||||
} |
||||
layer { |
||||
name: "conv2" |
||||
type: "Convolution" |
||||
bottom: "pool1" |
||||
top: "conv2" |
||||
convolution_param { |
||||
num_output: 7 |
||||
kernel_size: 5 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool2" |
||||
type: "Pooling" |
||||
bottom: "conv2" |
||||
top: "pool2" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu2" |
||||
type: "ReLU" |
||||
bottom: "pool2" |
||||
top: "pool2" |
||||
} |
||||
layer { |
||||
name: "ip1" |
||||
type: "InnerProduct" |
||||
bottom: "pool2" |
||||
top: "ip1" |
||||
inner_product_param { |
||||
num_output: 256 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu3" |
||||
type: "ReLU" |
||||
bottom: "ip1" |
||||
top: "ip1" |
||||
} |
||||
layer { |
||||
name: "feat" |
||||
type: "InnerProduct" |
||||
bottom: "ip1" |
||||
top: "feat" |
||||
inner_product_param { |
||||
num_output: 4 |
||||
} |
||||
} |
Binary file not shown.
@ -0,0 +1,197 @@ |
||||
#include "precomp.hpp" |
||||
using namespace caffe; |
||||
using std::string; |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace cnn_3dobj |
||||
{ |
||||
Classification::Classification(const string& model_file, const string& trained_file, const string& mean_file, const string& label_file) { |
||||
#ifdef CPU_ONLY |
||||
caffe::Caffe::set_mode(caffe::Caffe::CPU); |
||||
#else |
||||
caffe::Caffe::set_mode(caffe::Caffe::GPU); |
||||
#endif |
||||
|
||||
/* Load the network. */ |
||||
net_.reset(new Net<float>(model_file, TEST)); |
||||
net_->CopyTrainedLayersFrom(trained_file); |
||||
|
||||
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input."; |
||||
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output."; |
||||
|
||||
Blob<float>* input_layer = net_->input_blobs()[0]; |
||||
num_channels_ = input_layer->channels(); |
||||
CHECK(num_channels_ == 3 || num_channels_ == 1) |
||||
<< "Input layer should have 1 or 3 channels."; |
||||
input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); |
||||
|
||||
/* Load the binaryproto mean file. */ |
||||
SetMean(mean_file); |
||||
|
||||
/* Load labels. */ |
||||
std::ifstream labels(label_file.c_str()); |
||||
CHECK(labels) << "Unable to open labels file " << label_file; |
||||
string line; |
||||
while (std::getline(labels, line)) |
||||
labels_.push_back(string(line)); |
||||
|
||||
/* Blob<float>* output_layer = net_->output_blobs()[0];
|
||||
CHECK_EQ(labels_.size(), output_layer->channels()) |
||||
<< "Number of labels is different from the output layer dimension.";*/ |
||||
} |
||||
|
||||
/*bool Classifier::PairCompare(const std::pair<float, int>& lhs,
|
||||
const std::pair<float, int>& rhs) { |
||||
return lhs.first > rhs.first; |
||||
}*/ |
||||
|
||||
/* Return the indices of the top N values of vector v. */ |
||||
std::vector<int> Classification::Argmax(const std::vector<float>& v, int N) { |
||||
std::vector<std::pair<float, int> > pairs; |
||||
for (size_t i = 0; i < v.size(); ++i) |
||||
pairs.push_back(std::make_pair(v[i], i)); |
||||
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end()); |
||||
|
||||
std::vector<int> result; |
||||
for (int i = 0; i < N; ++i) |
||||
result.push_back(pairs[i].second); |
||||
return result; |
||||
} |
||||
|
||||
//Return the top N predictions.
|
||||
std::vector<std::pair<string, float> > Classification::Classify(const std::vector<cv::Mat>& reference, const cv::Mat& img, int N, bool mean_substract) { |
||||
cv::Mat feature = feature_extract(img, mean_substract); |
||||
std::vector<float> output; |
||||
for (unsigned int i = 0; i < reference.size(); i++) { |
||||
cv::Mat f1 = reference.at(i); |
||||
cv::Mat f2 = feature; |
||||
cv::Mat output_temp = f1.t()-f2; |
||||
output.push_back(cv::norm(output_temp)); |
||||
} |
||||
std::vector<int> maxN = Argmax(output, N); |
||||
std::vector<std::pair<string, float> > predictions; |
||||
for (int i = 0; i < N; ++i) { |
||||
int idx = maxN[i]; |
||||
predictions.push_back(std::make_pair(labels_[idx], output[idx])); |
||||
} |
||||
|
||||
return predictions; |
||||
} |
||||
|
||||
/* Load the mean file in binaryproto format. */ |
||||
void Classification::SetMean(const string& mean_file) { |
||||
BlobProto blob_proto; |
||||
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); |
||||
|
||||
/* Convert from BlobProto to Blob<float> */ |
||||
Blob<float> mean_blob; |
||||
mean_blob.FromProto(blob_proto); |
||||
CHECK_EQ(mean_blob.channels(), num_channels_) |
||||
<< "Number of channels of mean file doesn't match input layer."; |
||||
|
||||
/* The format of the mean file is planar 32-bit float BGR or grayscale. */ |
||||
std::vector<cv::Mat> channels; |
||||
float* data = mean_blob.mutable_cpu_data(); |
||||
for (int i = 0; i < num_channels_; ++i) { |
||||
/* Extract an individual channel. */ |
||||
cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); |
||||
channels.push_back(channel); |
||||
data += mean_blob.height() * mean_blob.width(); |
||||
} |
||||
|
||||
/* Merge the separate channels into a single image. */ |
||||
cv::Mat mean; |
||||
cv::merge(channels, mean); |
||||
|
||||
/* Compute the global mean pixel value and create a mean image
|
||||
* filled with this value. */ |
||||
cv::Scalar channel_mean = cv::mean(mean); |
||||
mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); |
||||
} |
||||
|
||||
cv::Mat Classification::feature_extract(const cv::Mat& img, bool mean_subtract) { |
||||
Blob<float>* input_layer = net_->input_blobs()[0]; |
||||
input_layer->Reshape(1, num_channels_, |
||||
input_geometry_.height, input_geometry_.width); |
||||
/* Forward dimension change to all layers. */ |
||||
net_->Reshape(); |
||||
|
||||
std::vector<cv::Mat> input_channels; |
||||
WrapInputLayer(&input_channels); |
||||
|
||||
Preprocess(img, &input_channels, mean_subtract); |
||||
|
||||
net_->ForwardPrefilled(); |
||||
|
||||
/* Copy the output layer to a std::vector */ |
||||
Blob<float>* output_layer = net_->output_blobs()[0]; |
||||
const float* begin = output_layer->cpu_data(); |
||||
const float* end = begin + output_layer->channels(); |
||||
//return std::vector<float>(begin, end);
|
||||
std::vector<float> featureVec = std::vector<float>(begin, end); |
||||
cv::Mat feature = cv::Mat(featureVec, true); |
||||
return feature; |
||||
} |
||||
|
||||
/* Wrap the input layer of the network in separate cv::Mat objects
|
||||
* (one per channel). This way we save one memcpy operation and we |
||||
* don't need to rely on cudaMemcpy2D. The last preprocessing |
||||
* operation will write the separate channels directly to the input |
||||
* layer. */ |
||||
void Classification::WrapInputLayer(std::vector<cv::Mat>* input_channels) { |
||||
Blob<float>* input_layer = net_->input_blobs()[0]; |
||||
|
||||
int width = input_layer->width(); |
||||
int height = input_layer->height(); |
||||
float* input_data = input_layer->mutable_cpu_data(); |
||||
for (int i = 0; i < input_layer->channels(); ++i) { |
||||
cv::Mat channel(height, width, CV_32FC1, input_data); |
||||
input_channels->push_back(channel); |
||||
input_data += width * height; |
||||
} |
||||
} |
||||
|
||||
void Classification::Preprocess(const cv::Mat& img, |
||||
std::vector<cv::Mat>* input_channels, bool mean_subtract) { |
||||
/* Convert the input image to the input image format of the network. */ |
||||
cv::Mat sample; |
||||
if (img.channels() == 3 && num_channels_ == 1) |
||||
cv::cvtColor(img, sample, CV_BGR2GRAY); |
||||
else if (img.channels() == 4 && num_channels_ == 1) |
||||
cv::cvtColor(img, sample, CV_BGRA2GRAY); |
||||
else if (img.channels() == 4 && num_channels_ == 3) |
||||
cv::cvtColor(img, sample, CV_BGRA2BGR); |
||||
else if (img.channels() == 1 && num_channels_ == 3) |
||||
cv::cvtColor(img, sample, CV_GRAY2BGR); |
||||
else |
||||
sample = img; |
||||
|
||||
cv::Mat sample_resized; |
||||
if (sample.size() != input_geometry_) |
||||
cv::resize(sample, sample_resized, input_geometry_); |
||||
else |
||||
sample_resized = sample; |
||||
|
||||
cv::Mat sample_float; |
||||
if (num_channels_ == 3) |
||||
sample_resized.convertTo(sample_float, CV_32FC3); |
||||
else |
||||
sample_resized.convertTo(sample_float, CV_32FC1); |
||||
|
||||
cv::Mat sample_normalized; |
||||
if (mean_subtract) |
||||
cv::subtract(sample_float, mean_, sample_normalized); |
||||
else |
||||
sample_normalized = sample_float; |
||||
|
||||
/* This operation will write the separate BGR planes directly to the
|
||||
* input layer of the network because it is wrapped by the cv::Mat |
||||
* objects in input_channels. */ |
||||
cv::split(sample_normalized, *input_channels); |
||||
|
||||
CHECK(reinterpret_cast<float*>(input_channels->at(0).data) |
||||
== net_->input_blobs()[0]->cpu_data()) |
||||
<< "Input channels are not wrapping the input layer of the network."; |
||||
} |
||||
}} |
Loading…
Reference in new issue