parent
5072cc690f
commit
d6cb8889b4
18 changed files with 265 additions and 621 deletions
@ -1,3 +1,3 @@ |
||||
set(the_description "CNN for 3D object recognition and pose estimation including a completed Sphere View on 3D objects") |
||||
ocv_define_module(cnn_3dobj opencv_core opencv_imgproc opencv_viz opencv_highgui caffe protobuf leveldb glog OPTIONAL WRAP python) |
||||
target_link_libraries(cnn_3dobj caffe protobuf leveldb glog) |
||||
target_link_libraries(opencv_cnn_3dobj caffe protobuf leveldb glog) |
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,25 +0,0 @@ |
||||
# The train/test net protocol buffer definition |
||||
net: "examples/triplet/lfw_triplet_train_test.prototxt" |
||||
# test_iter specifies how many forward passes the test should carry out. |
||||
# In the case of lfw, we have test batch size 100 and 100 test iterations, |
||||
# covering the full 10,000 testing images. |
||||
test_iter: 100 |
||||
# Carry out testing every 500 training iterations. |
||||
test_interval: 500 |
||||
# The base learning rate, momentum and the weight decay of the network. |
||||
base_lr: 0.01 |
||||
momentum: 0.9 |
||||
weight_decay: 0.0000 |
||||
# The learning rate policy |
||||
lr_policy: "inv" |
||||
gamma: 0.0001 |
||||
power: 0.75 |
||||
# Display every 100 iterations |
||||
display: 100 |
||||
# The maximum number of iterations |
||||
max_iter: 50000 |
||||
# snapshot intermediate results |
||||
snapshot: 5000 |
||||
snapshot_prefix: "examples/triplet/lfw_triplet" |
||||
# solver mode: CPU or GPU |
||||
solver_mode: CPU |
@ -1,500 +0,0 @@ |
||||
name: "lfw_triplet_train_test" |
||||
layer { |
||||
name: "triplet_data" |
||||
type: "Data" |
||||
top: "triplet_data" |
||||
top: "sim" |
||||
include { |
||||
phase: TRAIN |
||||
} |
||||
transform_param { |
||||
scale: 0.00390625 |
||||
} |
||||
data_param { |
||||
source: "examples/triplet/lfw_triplet_train_leveldb" |
||||
batch_size: 64 |
||||
} |
||||
} |
||||
layer { |
||||
name: "triplet_data" |
||||
type: "Data" |
||||
top: "triplet_data" |
||||
top: "sim" |
||||
include { |
||||
phase: TEST |
||||
} |
||||
transform_param { |
||||
scale: 0.00390625 |
||||
} |
||||
data_param { |
||||
source: "examples/triplet/lfw_triplet_test_leveldb" |
||||
batch_size: 100 |
||||
} |
||||
} |
||||
layer { |
||||
name: "slice_triplet" |
||||
type: "Slice" |
||||
bottom: "triplet_data" |
||||
top: "data" |
||||
top: "data_true" |
||||
top: "data_false" |
||||
slice_param { |
||||
slice_dim: 1 |
||||
slice_point: 1 |
||||
slice_point: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv1" |
||||
type: "Convolution" |
||||
bottom: "data" |
||||
top: "conv1" |
||||
param { |
||||
name: "conv1_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "conv1_b" |
||||
lr_mult: 2 |
||||
} |
||||
convolution_param { |
||||
num_output: 20 |
||||
kernel_size: 5 |
||||
stride: 1 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool1" |
||||
type: "Pooling" |
||||
bottom: "conv1" |
||||
top: "pool1" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv2" |
||||
type: "Convolution" |
||||
bottom: "pool1" |
||||
top: "conv2" |
||||
param { |
||||
name: "conv2_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "conv2_b" |
||||
lr_mult: 2 |
||||
} |
||||
convolution_param { |
||||
num_output: 50 |
||||
kernel_size: 5 |
||||
stride: 1 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool2" |
||||
type: "Pooling" |
||||
bottom: "conv2" |
||||
top: "pool2" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "ip1" |
||||
type: "InnerProduct" |
||||
bottom: "pool2" |
||||
top: "ip1" |
||||
param { |
||||
name: "ip1_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "ip1_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 500 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1" |
||||
type: "ReLU" |
||||
bottom: "ip1" |
||||
top: "ip1" |
||||
} |
||||
layer { |
||||
name: "ip2" |
||||
type: "InnerProduct" |
||||
bottom: "ip1" |
||||
top: "ip2" |
||||
param { |
||||
name: "ip2_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "ip2_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 10 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "feat" |
||||
type: "InnerProduct" |
||||
bottom: "ip2" |
||||
top: "feat" |
||||
param { |
||||
name: "feat_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "feat_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 2 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv1_true" |
||||
type: "Convolution" |
||||
bottom: "data_true" |
||||
top: "conv1_true" |
||||
param { |
||||
name: "conv1_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "conv1_b" |
||||
lr_mult: 2 |
||||
} |
||||
convolution_param { |
||||
num_output: 20 |
||||
kernel_size: 5 |
||||
stride: 1 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool1_true" |
||||
type: "Pooling" |
||||
bottom: "conv1_true" |
||||
top: "pool1_true" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv2_true" |
||||
type: "Convolution" |
||||
bottom: "pool1_true" |
||||
top: "conv2_true" |
||||
param { |
||||
name: "conv2_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "conv2_b" |
||||
lr_mult: 2 |
||||
} |
||||
convolution_param { |
||||
num_output: 50 |
||||
kernel_size: 5 |
||||
stride: 1 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool2_true" |
||||
type: "Pooling" |
||||
bottom: "conv2_true" |
||||
top: "pool2_true" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "ip1_true" |
||||
type: "InnerProduct" |
||||
bottom: "pool2_true" |
||||
top: "ip1_true" |
||||
param { |
||||
name: "ip1_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "ip1_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 500 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1_true" |
||||
type: "ReLU" |
||||
bottom: "ip1_true" |
||||
top: "ip1_true" |
||||
} |
||||
layer { |
||||
name: "ip2_true" |
||||
type: "InnerProduct" |
||||
bottom: "ip1_true" |
||||
top: "ip2_true" |
||||
param { |
||||
name: "ip2_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "ip2_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 10 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "feat_true" |
||||
type: "InnerProduct" |
||||
bottom: "ip2_true" |
||||
top: "feat_true" |
||||
param { |
||||
name: "feat_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "feat_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 2 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv1_false" |
||||
type: "Convolution" |
||||
bottom: "data_false" |
||||
top: "conv1_false" |
||||
param { |
||||
name: "conv1_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "conv1_b" |
||||
lr_mult: 2 |
||||
} |
||||
convolution_param { |
||||
num_output: 20 |
||||
kernel_size: 5 |
||||
stride: 1 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool1_false" |
||||
type: "Pooling" |
||||
bottom: "conv1_false" |
||||
top: "pool1_false" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv2_false" |
||||
type: "Convolution" |
||||
bottom: "pool1_false" |
||||
top: "conv2_false" |
||||
param { |
||||
name: "conv2_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "conv2_b" |
||||
lr_mult: 2 |
||||
} |
||||
convolution_param { |
||||
num_output: 50 |
||||
kernel_size: 5 |
||||
stride: 1 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "pool2_false" |
||||
type: "Pooling" |
||||
bottom: "conv2_false" |
||||
top: "pool2_false" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "ip1_false" |
||||
type: "InnerProduct" |
||||
bottom: "pool2_false" |
||||
top: "ip1_false" |
||||
param { |
||||
name: "ip1_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "ip1_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 500 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1_false" |
||||
type: "ReLU" |
||||
bottom: "ip1_false" |
||||
top: "ip1_false" |
||||
} |
||||
layer { |
||||
name: "ip2_false" |
||||
type: "InnerProduct" |
||||
bottom: "ip1_false" |
||||
top: "ip2_false" |
||||
param { |
||||
name: "ip2_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "ip2_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 10 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "feat_false" |
||||
type: "InnerProduct" |
||||
bottom: "ip2_false" |
||||
top: "feat_false" |
||||
param { |
||||
name: "feat_w" |
||||
lr_mult: 1 |
||||
} |
||||
param { |
||||
name: "feat_b" |
||||
lr_mult: 2 |
||||
} |
||||
inner_product_param { |
||||
num_output: 2 |
||||
weight_filler { |
||||
type: "xavier" |
||||
} |
||||
bias_filler { |
||||
type: "constant" |
||||
} |
||||
} |
||||
} |
||||
layer { |
||||
name: "loss" |
||||
type: "TripletLoss" |
||||
bottom: "feat" |
||||
bottom: "feat_true" |
||||
bottom: "feat_false" |
||||
bottom: "sim" |
||||
top: "loss" |
||||
triplet_loss_param { |
||||
margin: 0.2 |
||||
} |
||||
} |
||||
|
@ -1,37 +0,0 @@ |
||||
#!/usr/bin/env sh |
||||
# This script converts the lfw data into leveldb format. |
||||
|
||||
git clone https://github.com/Wangyida/caffe/tree/cnn_triplet |
||||
cd caffe |
||||
mkdir build |
||||
cd build |
||||
cmake -DCMAKE_INSTALL_PREFIX=/usr/local .. |
||||
make -j4 |
||||
make test |
||||
sudo make install |
||||
cd .. |
||||
cmake .. |
||||
make -j4 |
||||
|
||||
./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/ape.ply -imagedir=../data/images_ape/ -labeldir=../data/label_ape.txt -num_class=4 -label_class=0 |
||||
./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/ant.ply -imagedir=../data/images_ant/ -labeldir=../data/label_ant.txt -num_class=4 -label_class=1 |
||||
./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/cow.ply -imagedir=../data/images_cow/ -labeldir=../data/label_cow.txt -num_class=4 -label_class=2 |
||||
./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/plane.ply -imagedir=../data/images_plane/ -labeldir=../data/label_plane.txt -num_class=4 -label_class=3 |
||||
|
||||
echo "Creating leveldb..." |
||||
|
||||
rm -rf ./linemod_triplet_train_leveldb |
||||
rm -rf ./linemod_triplet_test_leveldb |
||||
|
||||
convert_lfw_triplet_data \ |
||||
./binary_image_train \ |
||||
./binary_label_train \ |
||||
./linemod_triplet_train_leveldb |
||||
convert_lfw_triplet_data \ |
||||
./binary_image_test \ |
||||
./binary_image_test \ |
||||
./linemod_triplet_test_leveldb |
||||
|
||||
echo "Done." |
||||
|
||||
caffe train --solver=examples/triplet/lfw_triplet_solver.prototxt |
Binary file not shown.
@ -0,0 +1,99 @@ |
||||
/*
|
||||
* Software License Agreement (BSD License) |
||||
* |
||||
* Copyright (c) 2009, Willow Garage, Inc. |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions |
||||
* are met: |
||||
* |
||||
* * Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* * Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* * Neither the name of Willow Garage, Inc. nor the names of its |
||||
* contributors may be used to endorse or promote products derived |
||||
* from this software without specific prior written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
#include <opencv2/cnn_3dobj.hpp> |
||||
#include <stdio.h> // for snprintf |
||||
#include <tr1/memory> |
||||
#include <string> |
||||
#include <vector> |
||||
#include "google/protobuf/text_format.h" |
||||
#include <opencv2/opencv.hpp> |
||||
#include <opencv2/core/core.hpp> |
||||
#define CPU_ONLY |
||||
#include "caffe/blob.hpp" |
||||
#include "caffe/common.hpp" |
||||
#include "caffe/net.hpp" |
||||
#include "caffe/proto/caffe.pb.h" |
||||
#include "caffe/util/io.hpp" |
||||
#include "caffe/vision_layers.hpp" |
||||
using caffe::Blob; |
||||
using caffe::Caffe; |
||||
using caffe::Datum; |
||||
using caffe::Net; |
||||
//using boost::shared_ptr;
|
||||
using std::string; |
||||
//namespace db = caffe::db;
|
||||
using namespace cv; |
||||
using namespace std; |
||||
using namespace cv::cnn_3dobj; |
||||
int main(int argc, char* argv[]) |
||||
{ |
||||
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}" |
||||
"{src_dir | ../data/images_all/ | Source direction of the images ready for being converted to leveldb dataset.}" |
||||
"{src_dst | ../data/dbfile | Aim direction of the converted to leveldb dataset. }" |
||||
"{attach_dir | ../data/dbfile | Path for saving additional files which describe the transmission results. }" |
||||
"{channel | 1 | Channel of the images. }" |
||||
"{width | 64 | Width of images}" |
||||
"{height | 64 | Height of images}" |
||||
"{pretrained_binary_proto | ../data/3d_triplet_iter_10000.caffemodel | caffe model for feature exrtaction.}" |
||||
"{feature_extraction_proto | ../data/3d_triplet_train_test.prototxt | network definition in .prototxt the path of the training samples must be wrotten in in .prototxt files in Phase TEST}" |
||||
"{save_feature_dataset_names | ../data/feature/feature_iter_10000.bin | the output of the extracted feature in form of binary files together with the vector<cv::Mat> features as the feature.}" |
||||
"{extract_feature_blob_names | feat | the layer used for feature extraction in CNN.}" |
||||
"{num_mini_batches | 6 | batches suit for the batches defined in the .proto for the aim of extracting feature from all images.}" |
||||
"{device | CPU | device}" |
||||
"{dev_id | 0 | dev_id}"; |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
parser.about("Demo for Sphere View data generation"); |
||||
if (parser.has("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
string src_dir = parser.get<string>("src_dir"); |
||||
string src_dst = parser.get<string>("src_dst"); |
||||
string attach_dir = parser.get<string>("attach_dir"); |
||||
int channel = parser.get<int>("channel"); |
||||
int width = parser.get<int>("width"); |
||||
int height = parser.get<int>("height"); |
||||
string pretrained_binary_proto = parser.get<string>("pretrained_binary_proto"); |
||||
string feature_extraction_proto = parser.get<string>("feature_extraction_proto"); |
||||
string save_feature_dataset_names = parser.get<string>("save_feature_dataset_names"); |
||||
string extract_feature_blob_names = parser.get<string>("extract_feature_blob_names"); |
||||
int num_mini_batches = parser.get<int>("num_mini_batches"); |
||||
string device = parser.get<string>("device"); |
||||
int dev_id = parser.get<int>("dev_id"); |
||||
cv::cnn_3dobj::DataTrans transTemp; |
||||
transTemp.convert(src_dir,src_dst,attach_dir,channel,width,height); |
||||
std::vector<cv::Mat> extractedFeature = transTemp.feature_extraction_pipeline(pretrained_binary_proto, feature_extraction_proto, save_feature_dataset_names, extract_feature_blob_names, num_mini_batches, device, dev_id); |
||||
} |
Loading…
Reference in new issue