modify the mean file of images using

pull/276/head
Wangyida 9 years ago
parent 736638710e
commit ba770cd524
  1. 39
      modules/cnn_3dobj/include/opencv2/cnn_3dobj.hpp
  2. 159
      modules/cnn_3dobj/samples/classifyIMG_demo.cpp
  3. 167
      modules/cnn_3dobj/samples/sphereview_3dobj_demo.cpp
  4. 94
      modules/cnn_3dobj/src/cnn_feature.cpp
  5. 24
      modules/cnn_3dobj/src/cnn_sphereview.cpp

@ -91,7 +91,7 @@ namespace cnn_3dobj
The class create some sphere views of camera towards a 3D object meshed from .ply files @cite hinterstoisser2008panter .
*/
class CV_EXPORTS_W IcoSphere
class CV_EXPORTS_W icoSphere
{
private:
float X;
@ -104,7 +104,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
std::vector<cv::Point3d> CameraPos_temp;
float radius;
float diff;
IcoSphere(float radius_in, int depth_in);
icoSphere(float radius_in, int depth_in);
/** @brief Make all view points having the some distance from the focal point used by the camera view.
*/
CV_WRAP void norm(float v[]);
@ -116,7 +116,7 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
CV_WRAP void subdivide(float v1[], float v2[], float v3[], int depth);
/** @brief Make all view points having the some distance from the focal point used by the camera view.
*/
CV_WRAP static uint32_t swap_endian(uint32_t val);
CV_WRAP static uint32_t swapEndian(uint32_t val);
/** @brief Suit the position of bytes in 4 byte data structure for particular system.
*/
CV_WRAP cv::Point3d getCenter(cv::Mat cloud);
@ -133,52 +133,47 @@ The class create some sphere views of camera towards a 3D object meshed from .pl
*/
};
class CV_EXPORTS_W DescriptorExtractor
class CV_EXPORTS_W descriptorExtractor
{
private:
caffe::Net<float>* net_;
cv::Size input_geometry_;
int num_channels_;
cv::Mat mean_;
std::vector<string> labels_;
void SetMean(const string& mean_file);
void setMean(const string& mean_file);
/** @brief Load the mean file in binaryproto format.
*/
void WrapInputLayer(std::vector<cv::Mat>* input_channels);
void wrapInputLayer(std::vector<cv::Mat>* input_channels);
/** @brief Wrap the input layer of the network in separate cv::Mat objects(one per channel). This way we save one memcpy operation and we don't need to rely on cudaMemcpy2D. The last preprocessing operation will write the separate channels directly to the input layer.
*/
void Preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels, bool mean_subtract);
void preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels, int net_ready);
/** @brief Convert the input image to the input image format of the network.
*/
public:
DescriptorExtractor();
void list_dir(const char *path,std::vector<string>& files,bool r);
std::vector<string> labels_;
descriptorExtractor();
void listDir(const char *path,std::vector<string>& files,bool r);
/** @brief Get the file name from a root dictionary.
*/
bool SetNet(const string& cpu_only, int device_id);
bool setNet(const string& cpu_only, int device_id);
/** @brief Initiate a classification structure.
*/
bool LoadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file);
int loadNet(bool netsetter, const string& model_file, const string& trained_file);
/** @brief Initiate a classification structure.
*/
void GetLabellist(const std::vector<string>& name_gallery);
/** @brief Get the label of the gallery images for result displaying in prediction.
int loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file);
/** @brief Initiate a classification structure.
*/
std::vector<std::pair<string, float> > Classify(const cv::Mat& reference, const cv::Mat& target, int N);
/** @brief Make a classification.
void getLabellist(const std::vector<string>& name_gallery);
/** @brief Get the label of the gallery images for result displaying in prediction.
*/
void Extract(bool net_ready, InputArray inputimg, OutputArray feature, bool mean_subtract, std::string feature_blob);
void extract(int net_ready, InputArray inputimg, OutputArray feature, std::string feature_blob);
/** @brief Extract a single featrue of one image.
*/
std::vector<int> Argmax(const std::vector<float>& v, int N);
/** @brief Find the N largest number.
*/
};
//! @}
}
}
#endif /* CNN_3DOBJ_HPP_ */
#endif

@ -38,70 +38,107 @@
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
/* Return the indices of the top N values of vector v. */
std::vector<int> argmax(const std::vector<float>& v, int N)
{
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end());
std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
};
/* Return the indices of the top N values of vector v. */
std::vector<std::pair<string, float> > classify(const cv::Mat& reference, const cv::Mat& target, int N, std::vector<string> labels_)
{
std::vector<float> output;
for (int i = 0; i < reference.rows; i++)
{
cv::Mat f1 = reference.row(i);
cv::Mat f2 = target;
cv::Mat output_temp = f1-f2;
output.push_back(cv::norm(output_temp));
}
std::vector<int> maxN = argmax(output, N);
std::vector<std::pair<string, float> > predictions;
for (int i = 0; i < N; ++i)
{
int idx = maxN[i];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
}
return predictions;
};
int main(int argc, char** argv)
{
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe.}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}"
"{caffemodel | ../data/3d_triplet_iter_20000.caffemodel | caffe model for feature exrtaction.}"
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
"{mean_file | ../data/images_mean/triplet_mean.binaryproto | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images.}"
"{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}"
"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for object data classification and pose estimation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string src_dir = parser.get<string>("src_dir");
string caffemodel = parser.get<string>("caffemodel");
string network_forIMG = parser.get<string>("network_forIMG");
string mean_file = parser.get<string>("mean_file");
string target_img = parser.get<string>("target_img");
string feature_blob = parser.get<string>("feature_blob");
int num_candidate = parser.get<int>("num_candidate");
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
const String keys = "{help | | this demo will convert a set of images in a particular path into leveldb database for feature extraction using Caffe. If there little variance in data such as human faces, you can add a mean_file, otherwise it is not so useful}"
"{src_dir | ../data/images_all/ | Source direction of the images ready for being used for extract feature as gallery.}"
"{caffemodel | ../data/3d_triplet_iter_20000.caffemodel | caffe model for feature exrtaction.}"
"{network_forIMG | ../data/3d_triplet_testIMG.prototxt | Network definition file used for extracting feature from a single image and making a classification}"
"{mean_file | no | The mean file generated by Caffe from all gallery images, this could be used for mean value substraction from all images. If you want to use the mean file, you can set this as ../data/images_mean/triplet_mean.binaryproto.}"
"{target_img | ../data/images_all/3_13.png | Path of image waiting to be classified.}"
"{feature_blob | feat | Name of layer which will represent as the feature, in this network, ip1 or feat is well.}"
"{num_candidate | 6 | Number of candidates in gallery as the prediction result.}"
"{device | CPU | device}"
"{dev_id | 0 | dev_id}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for object data classification and pose estimation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
string src_dir = parser.get<string>("src_dir");
string caffemodel = parser.get<string>("caffemodel");
string network_forIMG = parser.get<string>("network_forIMG");
string mean_file = parser.get<string>("mean_file");
string target_img = parser.get<string>("target_img");
string feature_blob = parser.get<string>("feature_blob");
int num_candidate = parser.get<int>("num_candidate");
string device = parser.get<string>("device");
int dev_id = parser.get<int>("dev_id");
cv::cnn_3dobj::DescriptorExtractor descriptor;
bool set_succeed = descriptor.SetNet(device, dev_id);
descriptor.LoadNet(set_succeed, network_forIMG, caffemodel, mean_file);
std::vector<string> name_gallery;
descriptor.list_dir(src_dir.c_str(), name_gallery, false);
descriptor.GetLabellist(name_gallery);
for (unsigned int i = 0; i < name_gallery.size(); i++) {
name_gallery[i] = src_dir + name_gallery[i];
}
std::vector<cv::Mat> img_gallery;
cv::Mat feature_reference;
for (unsigned int i = 0; i < name_gallery.size(); i++) {
img_gallery.push_back(cv::imread(name_gallery[i], -1));
}
descriptor.FeatureExtract(img_gallery, feature_reference, false, feature_blob);
cv::cnn_3dobj::descriptorExtractor descriptor;
bool set_succeed = descriptor.setNet(device, dev_id);
int net_ready;
if (strcmp(mean_file.c_str(), "no") == 0)
net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel);
else
net_ready = descriptor.loadNet(set_succeed, network_forIMG, caffemodel, mean_file);
std::vector<string> name_gallery;
descriptor.listDir(src_dir.c_str(), name_gallery, false);
descriptor.getLabellist(name_gallery);
for (unsigned int i = 0; i < name_gallery.size(); i++) {
name_gallery[i] = src_dir + name_gallery[i];
}
std::vector<cv::Mat> img_gallery;
cv::Mat feature_reference;
for (unsigned int i = 0; i < name_gallery.size(); i++) {
img_gallery.push_back(cv::imread(name_gallery[i], -1));
}
descriptor.extract(net_ready, img_gallery, feature_reference, feature_blob);
std::cout << std::endl << "---------- Prediction for "
<< target_img << " ----------" << std::endl;
std::cout << std::endl << "---------- Prediction for " << target_img << " ----------" << std::endl;
cv::Mat img = cv::imread(target_img, -1);
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl;
std::vector<std::pair<string, float> > prediction;
for (unsigned int i = 0; i < feature_reference.rows; i++)
std::cout << feature_reference.row(i) << endl;
cv::Mat feature_test;
descriptor.FeatureExtract(img, feature_test, false, feature_blob);
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl;
prediction = descriptor.Classify(feature_reference, feature_test, num_candidate);
// Print the top N prediction.
std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
for (size_t i = 0; i < prediction.size(); ++i) {
std::pair<string, float> p = prediction[i];
std::cout << std::fixed << std::setprecision(2) << p.second << " - \""
<< p.first << "\"" << std::endl;
}
return 0;
cv::Mat img = cv::imread(target_img, -1);
// CHECK(!img.empty()) << "Unable to decode image " << target_img;
std::cout << std::endl << "---------- Featrue of gallery images ----------" << std::endl;
std::vector<std::pair<string, float> > prediction;
for (unsigned int i = 0; i < feature_reference.rows; i++)
std::cout << feature_reference.row(i) << endl;
cv::Mat feature_test;
descriptor.extract(net_ready, img, feature_test, feature_blob);
std::cout << std::endl << "---------- Featrue of target image: " << target_img << "----------" << endl << feature_test << std::endl;
prediction = classify(feature_reference, feature_test, num_candidate, descriptor.labels_);
// Print the top N prediction.
std::cout << std::endl << "---------- Prediction result(Distance - File Name in Gallery) ----------" << std::endl;
for (size_t i = 0; i < prediction.size(); ++i) {
std::pair<string, float> p = prediction[i];
std::cout << std::fixed << std::setprecision(2) << p.second << " - \"" << p.first << "\"" << std::endl;
}
return 0;
}

@ -40,89 +40,90 @@
using namespace cv;
using namespace std;
using namespace cv::cnn_3dobj;
int main(int argc, char *argv[]){
const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/ape.ply -imagedir=../data/images_ape/ -labeldir=../data/label_ape.txt -num_class=4 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}"
"{ite_depth | 2 | Iteration of sphere generation.}"
"{plymodel | ../3Dmodel/ape.ply | path of the '.ply' file for image rendering. }"
"{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }"
"{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }"
"{num_class | 4 | total number of classes of models}"
"{label_class | 0 | class label of current .ply model}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
int ite_depth = parser.get<int>("ite_depth");
string plymodel = parser.get<string>("plymodel");
string imagedir = parser.get<string>("imagedir");
string labeldir = parser.get<string>("labeldir");
int num_class = parser.get<int>("num_class");
int label_class = parser.get<int>("label_class");
cv::cnn_3dobj::IcoSphere ViewSphere(10,ite_depth);
std::vector<cv::Point3d> campos = ViewSphere.CameraPos;
std::fstream imglabel;
char* p=(char*)labeldir.data();
imglabel.open(p, fstream::app|fstream::out);
bool camera_pov = (true);
/// Create a window
viz::Viz3d myWindow("Coordinate Frame");
myWindow.setWindowSize(Size(64,64));
/// Add coordinate axes
myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem());
myWindow.setBackgroundColor(viz::Color::gray());
myWindow.spin();
/// Set background color
/// Let's assume camera has the following properties
/// Create a cloud widget.
viz::Mesh objmesh = viz::Mesh::load(plymodel);
Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud);
float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point);
Point3d cam_y_dir(0.0f,0.0f,1.0f);
const char* headerPath = "./header_for_";
const char* binaryPath = "./binary_";
ViewSphere.createHeader((int)campos.size(), 64, 64, headerPath);
for(int pose = 0; pose < (int)campos.size(); pose++){
char* temp = new char;
sprintf (temp,"%d",label_class);
string filename = temp;
filename += "_";
sprintf (temp,"%d",pose);
filename += temp;
filename += ".png";
imglabel << filename << ' ' << (int)(campos.at(pose).x*100) << ' ' << (int)(campos.at(pose).y*100) << ' ' << (int)(campos.at(pose).z*100) << endl;
filename = imagedir + filename;
/// We can get the pose of the cam using makeCameraPoses
Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point);
/// We can get the transformation matrix from camera coordinate system to global using
/// - makeTransformToGlobal. We need the axes of the camera
Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose));
viz::WMesh mesh_widget(objmesh);
/// Pose of the widget in camera frame
Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f));
/// Pose of the widget in global frame
Affine3f cloud_pose_global = transform * cloud_pose;
/// Visualize camera frame
if (!camera_pov)
{
viz::WCameraPosition cpw(1); // Coordinate axes
viz::WCameraPosition cpw_frustum(Vec2f(0.5, 0.5)); // Camera frustum
myWindow.showWidget("CPW", cpw, cam_pose);
myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose);
}
int main(int argc, char *argv[])
{
const String keys = "{help | | demo :$ ./sphereview_test -ite_depth=2 -plymodel=../3Dmodel/ape.ply -imagedir=../data/images_ape/ -labeldir=../data/label_ape.txt -num_class=4 -label_class=0, then press 'q' to run the demo for images generation when you see the gray background and a coordinate.}"
"{ite_depth | 2 | Iteration of sphere generation.}"
"{plymodel | ../3Dmodel/ape.ply | path of the '.ply' file for image rendering. }"
"{imagedir | ../data/images_all/ | path of the generated images for one particular .ply model. }"
"{labeldir | ../data/label_all.txt | path of the generated images for one particular .ply model. }"
"{num_class | 4 | total number of classes of models}"
"{label_class | 0 | class label of current .ply model}";
cv::CommandLineParser parser(argc, argv, keys);
parser.about("Demo for Sphere View data generation");
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
int ite_depth = parser.get<int>("ite_depth");
string plymodel = parser.get<string>("plymodel");
string imagedir = parser.get<string>("imagedir");
string labeldir = parser.get<string>("labeldir");
int num_class = parser.get<int>("num_class");
int label_class = parser.get<int>("label_class");
cv::cnn_3dobj::icoSphere ViewSphere(10,ite_depth);
std::vector<cv::Point3d> campos = ViewSphere.CameraPos;
std::fstream imglabel;
char* p=(char*)labeldir.data();
imglabel.open(p, fstream::app|fstream::out);
bool camera_pov = (true);
/// Create a window
viz::Viz3d myWindow("Coordinate Frame");
myWindow.setWindowSize(Size(64,64));
/// Add coordinate axes
myWindow.showWidget("Coordinate Widget", viz::WCoordinateSystem());
myWindow.setBackgroundColor(viz::Color::gray());
myWindow.spin();
/// Set background color
/// Let's assume camera has the following properties
/// Create a cloud widget.
viz::Mesh objmesh = viz::Mesh::load(plymodel);
Point3d cam_focal_point = ViewSphere.getCenter(objmesh.cloud);
float radius = ViewSphere.getRadius(objmesh.cloud, cam_focal_point);
Point3d cam_y_dir(0.0f,0.0f,1.0f);
const char* headerPath = "./header_for_";
const char* binaryPath = "./binary_";
ViewSphere.createHeader((int)campos.size(), 64, 64, headerPath);
for(int pose = 0; pose < (int)campos.size(); pose++){
char* temp = new char;
sprintf (temp,"%d",label_class);
string filename = temp;
filename += "_";
sprintf (temp,"%d",pose);
filename += temp;
filename += ".png";
imglabel << filename << ' ' << (int)(campos.at(pose).x*100) << ' ' << (int)(campos.at(pose).y*100) << ' ' << (int)(campos.at(pose).z*100) << endl;
filename = imagedir + filename;
/// We can get the pose of the cam using makeCameraPoses
Affine3f cam_pose = viz::makeCameraPose(campos.at(pose)*radius+cam_focal_point, cam_focal_point, cam_y_dir*radius+cam_focal_point);
/// We can get the transformation matrix from camera coordinate system to global using
/// - makeTransformToGlobal. We need the axes of the camera
Affine3f transform = viz::makeTransformToGlobal(Vec3f(1.0f,0.0f,0.0f), Vec3f(0.0f,1.0f,0.0f), Vec3f(0.0f,0.0f,1.0f), campos.at(pose));
viz::WMesh mesh_widget(objmesh);
/// Pose of the widget in camera frame
Affine3f cloud_pose = Affine3f().translate(Vec3f(1.0f,1.0f,1.0f));
/// Pose of the widget in global frame
Affine3f cloud_pose_global = transform * cloud_pose;
/// Visualize camera frame
if (!camera_pov)
{
viz::WCameraPosition cpw(1); // Coordinate axes
viz::WCameraPosition cpw_frustum(Vec2f(0.5, 0.5)); // Camera frustum
myWindow.showWidget("CPW", cpw, cam_pose);
myWindow.showWidget("CPW_FRUSTUM", cpw_frustum, cam_pose);
}
/// Visualize widget
mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0);
myWindow.showWidget("ape", mesh_widget, cloud_pose_global);
/// Visualize widget
mesh_widget.setRenderingProperty(viz::LINE_WIDTH, 4.0);
myWindow.showWidget("ape", mesh_widget, cloud_pose_global);
/// Set the viewer pose to that of camera
if (camera_pov)
myWindow.setViewerPose(cam_pose);
myWindow.saveScreenshot(filename);
ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100));
}
imglabel.close();
return 1;
/// Set the viewer pose to that of camera
if (camera_pov)
myWindow.setViewerPose(cam_pose);
myWindow.saveScreenshot(filename);
ViewSphere.writeBinaryfile(filename, binaryPath, headerPath,(int)campos.size()*num_class, label_class, (int)(campos.at(pose).x*100), (int)(campos.at(pose).y*100), (int)(campos.at(pose).z*100));
}
imglabel.close();
return 1;
};

@ -6,8 +6,8 @@ namespace cv
{
namespace cnn_3dobj
{
DescriptorExtractor::DescriptorExtractor(){};
void DescriptorExtractor::list_dir(const char *path,vector<string>& files,bool r)
descriptorExtractor::descriptorExtractor(){};
void descriptorExtractor::listDir(const char *path,vector<string>& files,bool r)
{
DIR *pDir;
struct dirent *ent;
@ -25,7 +25,7 @@ namespace cnn_3dobj
if(r)
{
sprintf(childpath, "%s/%s", path, ent->d_name);
DescriptorExtractor::list_dir(childpath,files,false);
descriptorExtractor::listDir(childpath,files,false);
}
}
else
@ -36,7 +36,7 @@ namespace cnn_3dobj
sort(files.begin(),files.end());
};
bool DescriptorExtractor::SetNet(const string& cpu_only, int device_id)
bool descriptorExtractor::setNet(const string& cpu_only, int device_id)
{
if (strcmp(cpu_only.c_str(), "CPU") == 0 || strcmp(cpu_only.c_str(), "GPU") == 0)
{
@ -59,9 +59,9 @@ namespace cnn_3dobj
}
};
bool DescriptorExtractor::LoadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file)
int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file, const string& mean_file)
{
bool net_ready = false;
int net_ready = 0;
if (netsetter)
{
/* Load the network. */
@ -77,58 +77,50 @@ namespace cnn_3dobj
std::cout << "Input layer should have 1 or 3 channels." << std::endl;
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
/* Load the binaryproto mean file. */
SetMean(mean_file);
net_ready = true;
return net_ready;
setMean(mean_file);
net_ready = 2;
}
else
{
std::cout << "Error: Device must be set in advance using SetNet function" << std::endl;
return net_ready;
}
return net_ready;
};
void DescriptorExtractor::GetLabellist(const std::vector<string>& name_gallery)
int descriptorExtractor::loadNet(bool netsetter, const string& model_file, const string& trained_file)
{
for (unsigned int i = 0; i < name_gallery.size(); ++i)
labels_.push_back(name_gallery[i]);
};
/* Return the indices of the top N values of vector v. */
std::vector<int> DescriptorExtractor::Argmax(const std::vector<float>& v, int N)
{
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end());
std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
};
std::vector<std::pair<string, float> > DescriptorExtractor::Classify(const cv::Mat& reference, const cv::Mat& target, int N)
{
std::vector<float> output;
for (int i = 0; i < reference.rows; i++)
int net_ready = 0;
if (netsetter)
{
cv::Mat f1 = reference.row(i);
cv::Mat f2 = target;
cv::Mat output_temp = f1-f2;
output.push_back(cv::norm(output_temp));
/* Load the network. */
net_ = new Net<float>(model_file, TEST);
net_->CopyTrainedLayersFrom(trained_file);
if (net_->num_inputs() != 1)
std::cout << "Network should have exactly one input." << std::endl;
if (net_->num_outputs() != 1)
std::cout << "Network should have exactly one output." << std::endl;
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
if (num_channels_ != 3 && num_channels_ != 1)
std::cout << "Input layer should have 1 or 3 channels." << std::endl;
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
net_ready = 1;
}
std::vector<int> maxN = Argmax(output, N);
std::vector<std::pair<string, float> > predictions;
for (int i = 0; i < N; ++i)
else
{
int idx = maxN[i];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
std::cout << "Error: Device must be set in advance using SetNet function" << std::endl;
}
return predictions;
return net_ready;
};
void descriptorExtractor::getLabellist(const std::vector<string>& name_gallery)
{
for (unsigned int i = 0; i < name_gallery.size(); ++i)
labels_.push_back(name_gallery[i]);
};
/* Load the mean file in binaryproto format. */
void DescriptorExtractor::SetMean(const string& mean_file)
void descriptorExtractor::setMean(const string& mean_file)
{
BlobProto blob_proto;
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
@ -156,7 +148,7 @@ namespace cnn_3dobj
mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
};
void DescriptorExtractor::Extract(bool net_ready, InputArray inputimg, OutputArray feature, bool mean_subtract, std::string featrue_blob)
void descriptorExtractor::extract(int net_ready, InputArray inputimg, OutputArray feature, std::string featrue_blob)
{
if (net_ready)
{
@ -166,11 +158,11 @@ namespace cnn_3dobj
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels);
wrapInputLayer(&input_channels);
if (inputimg.kind() == 65536)
{/* this is a Mat */
Mat img = inputimg.getMat();
Preprocess(img, &input_channels, mean_subtract);
preprocess(img, &input_channels, net_ready);
net_->ForwardPrefilled();
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->blob_by_name(featrue_blob).get();
@ -187,7 +179,7 @@ namespace cnn_3dobj
Mat feature_vector;
for (unsigned int i = 0; i < img.size(); ++i)
{
Preprocess(img[i], &input_channels, mean_subtract);
preprocess(img[i], &input_channels, net_ready);
net_->ForwardPrefilled();
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->blob_by_name(featrue_blob).get();
@ -214,7 +206,7 @@ namespace cnn_3dobj
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void DescriptorExtractor::WrapInputLayer(std::vector<cv::Mat>* input_channels)
void descriptorExtractor::wrapInputLayer(std::vector<cv::Mat>* input_channels)
{
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
@ -228,8 +220,8 @@ namespace cnn_3dobj
}
};
void DescriptorExtractor::Preprocess(const cv::Mat& img,
std::vector<cv::Mat>* input_channels, bool mean_subtract)
void descriptorExtractor::preprocess(const cv::Mat& img,
std::vector<cv::Mat>* input_channels, int net_ready)
{
/* Convert the input image to the input image format of the network. */
cv::Mat sample;
@ -254,7 +246,7 @@ std::vector<cv::Mat>* input_channels, bool mean_subtract)
else
sample_resized.convertTo(sample_float, CV_32FC1);
cv::Mat sample_normalized;
if (mean_subtract)
if (net_ready == 2)
cv::subtract(sample_float, mean_, sample_normalized);
else
sample_normalized = sample_float;

@ -6,7 +6,7 @@ namespace cv
{
namespace cnn_3dobj
{
IcoSphere::IcoSphere(float radius_in, int depth_in)
icoSphere::icoSphere(float radius_in, int depth_in)
{
X = 0.5f;
@ -35,7 +35,7 @@ namespace cnn_3dobj
{
for (int k = 0; k<j; k++)
{
if (CameraPos.at(k).x-CameraPos.at(j).x<diff && CameraPos.at(k).y-CameraPos.at(j).y<diff && CameraPos.at(k).z-CameraPos.at(j).z<diff)
if (CameraPos.at(k).x-CameraPos.at(j).x < diff && CameraPos.at(k).y-CameraPos.at(j).y < diff && CameraPos.at(k).z-CameraPos.at(j).z < diff)
break;
if(k == j-1)
CameraPos_temp.push_back(CameraPos[j]);
@ -49,7 +49,7 @@ namespace cnn_3dobj
cout << CameraPos.at(i).x <<' '<< CameraPos.at(i).y << ' ' << CameraPos.at(i).z << endl;
}
};
void IcoSphere::norm(float v[])
void icoSphere::norm(float v[])
{
float len = 0;
for (int i = 0; i < 3; ++i)
@ -63,7 +63,7 @@ namespace cnn_3dobj
}
};
void IcoSphere::add(float v[])
void icoSphere::add(float v[])
{
Point3f temp_Campos;
std::vector<float>* temp = new std::vector<float>;
@ -77,7 +77,7 @@ namespace cnn_3dobj
CameraPos.push_back(temp_Campos);
};
void IcoSphere::subdivide(float v1[], float v2[], float v3[], int depth)
void icoSphere::subdivide(float v1[], float v2[], float v3[], int depth)
{
norm(v1);
norm(v2);
@ -107,13 +107,13 @@ namespace cnn_3dobj
subdivide(v12, v23, v31, depth - 1);
};
uint32_t IcoSphere::swap_endian(uint32_t val)
uint32_t icoSphere::swapEndian(uint32_t val)
{
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
return (val << 16) | (val >> 16);
};
cv::Point3d IcoSphere::getCenter(cv::Mat cloud)
cv::Point3d icoSphere::getCenter(cv::Mat cloud)
{
Point3f* data = cloud.ptr<cv::Point3f>();
Point3d dataout;
@ -129,7 +129,7 @@ namespace cnn_3dobj
return dataout;
};
float IcoSphere::getRadius(cv::Mat cloud, cv::Point3d center)
float icoSphere::getRadius(cv::Mat cloud, cv::Point3d center)
{
float radiusCam = 0;
Point3f* data = cloud.ptr<cv::Point3f>();
@ -149,7 +149,7 @@ namespace cnn_3dobj
return radiusCam;
};
void IcoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath)
void icoSphere::createHeader(int num_item, int rows, int cols, const char* headerPath)
{
char* a0 = (char*)malloc(1024);
strcpy(a0, headerPath);
@ -165,17 +165,17 @@ namespace cnn_3dobj
std::ofstream headerLabel(headerPathlab, ios::out|ios::binary);
int headerimg[4] = {2051,num_item,rows,cols};
for (int i=0; i<4; i++)
headerimg[i] = swap_endian(headerimg[i]);
headerimg[i] = swapEndian(headerimg[i]);
int headerlabel[2] = {2050,num_item};
for (int i=0; i<2; i++)
headerlabel[i] = swap_endian(headerlabel[i]);
headerlabel[i] = swapEndian(headerlabel[i]);
headerImg.write(reinterpret_cast<const char*>(headerimg), sizeof(int)*4);
headerImg.close();
headerLabel.write(reinterpret_cast<const char*>(headerlabel), sizeof(int)*2);
headerLabel.close();
};
void IcoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z)
void icoSphere::writeBinaryfile(string filenameImg, const char* binaryPath, const char* headerPath, int num_item, int label_class, int x, int y, int z)
{
int isrgb = 0;
cv::Mat ImgforBin = cv::imread(filenameImg, isrgb);

Loading…
Cancel
Save