diff --git a/modules/datasets/include/opencv2/datasets/or_imagenet.hpp b/modules/datasets/include/opencv2/datasets/or_imagenet.hpp index 2c6b74465..56564adba 100644 --- a/modules/datasets/include/opencv2/datasets/or_imagenet.hpp +++ b/modules/datasets/include/opencv2/datasets/or_imagenet.hpp @@ -56,9 +56,8 @@ namespace datasets struct OR_imagenetObj : public Object { - std::string wnid; - int id2; - std::string imageUrl; + int id; + std::string image; }; class CV_EXPORTS OR_imagenet : public Dataset diff --git a/modules/datasets/samples/or_imagenet.cpp b/modules/datasets/samples/or_imagenet.cpp index 13b9d8b01..4fb61d9f1 100644 --- a/modules/datasets/samples/or_imagenet.cpp +++ b/modules/datasets/samples/or_imagenet.cpp @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { const char *keys = "{ help h usage ? | | show this message }" - "{ path p |true| path to file with urls: fall11_urls.txt }"; + "{ path p |true| path to folder with dataset }"; CommandLineParser parser(argc, argv, keys); string path(parser.get("path")); if (parser.has("help") || path=="true") @@ -71,13 +71,28 @@ int main(int argc, char *argv[]) dataset->load(path); // *************** - // dataset contains for each object its id & image url. - // For example, let output dataset size and first object. - printf("dataset size: %u\n", (unsigned int)dataset->getTrain().size()); - OR_imagenetObj *example = static_cast(dataset->getTrain()[0].get()); - printf("first object url: %s\n", example->imageUrl.c_str()); - printf("first object wnid: %s\n", example->wnid.c_str()); - printf("first object id2: %u\n", example->id2); + // dataset contains for each object its id & image path + // For example, let output train\test\validation size and first image. + vector< Ptr > &curr = dataset->getTrain(); + printf("train:\nsize: %u\n", (unsigned int)curr.size()); + OR_imagenetObj *example = static_cast(curr[0].get()); + printf("first image:\n"); + printf("image: %s\n", example->image.c_str()); + printf("id: %u\n", example->id); + + vector< Ptr > &currT = dataset->getTest(); + printf("test:\nsize: %u\n", (unsigned int)currT.size()); + example = static_cast(currT[0].get()); + printf("first image:\n"); + printf("image: %s\n", example->image.c_str()); + printf("id: %u\n", example->id); + + vector< Ptr > &currV = dataset->getValidation(); + printf("validation:\nsize: %u\n", (unsigned int)currV.size()); + example = static_cast(currV[0].get()); + printf("first image:\n"); + printf("image: %s\n", example->image.c_str()); + printf("id: %u\n", example->id); return 0; } diff --git a/modules/datasets/src/or_imagenet.cpp b/modules/datasets/src/or_imagenet.cpp index 0fbebf976..9d257da29 100644 --- a/modules/datasets/src/or_imagenet.cpp +++ b/modules/datasets/src/or_imagenet.cpp @@ -43,6 +43,8 @@ #include "opencv2/datasets/util.hpp" #include "precomp.hpp" +#include + namespace cv { namespace datasets @@ -61,6 +63,8 @@ public: private: void loadDataset(const string &path); + + void numberToString(int number, string &out); }; /*OR_imagenetImp::OR_imagenetImp(const string &path) @@ -73,30 +77,87 @@ void OR_imagenetImp::load(const string &path) loadDataset(path); } +void OR_imagenetImp::numberToString(int number, string &out) +{ + char numberStr[9]; + sprintf(numberStr, "%u", number); + for (unsigned int i=0; i<8-strlen(numberStr); ++i) + { + out += "0"; + } + out += numberStr; +} + void OR_imagenetImp::loadDataset(const string &path) { train.push_back(vector< Ptr >()); test.push_back(vector< Ptr >()); validation.push_back(vector< Ptr >()); - ifstream infile((path + "fall11_urls.txt").c_str()); + map labels; + ifstream infile((path + "labels.txt").c_str()); string line; while (getline(infile, line)) { vector elems; - split(line, elems, '\t'); + split(line, elems, ','); + string syn = elems[0]; + int number = atoi(elems[1].c_str()); + + labels.insert(make_pair(syn, number)); + } + string pathTrain(path + "train/"); + vector fileNames; + getDirList(pathTrain, fileNames); + for (vector::iterator it=fileNames.begin(); it!=fileNames.end(); ++it) + { + string pathSyn((*it) + "/"); + vector fileNamesSyn; + getDirList((pathTrain + pathSyn), fileNamesSyn); + for (vector::iterator itSyn=fileNamesSyn.begin(); itSyn!=fileNamesSyn.end(); ++itSyn) + { + Ptr curr(new OR_imagenetObj); + curr->image = "train/" + pathSyn + *itSyn; + curr->id = labels[*it]; + + train.back().push_back(curr); + } + } + + ifstream infileVal((path + "ILSVRC2010_validation_ground_truth.txt").c_str()); + while (getline(infileVal, line)) + { Ptr curr(new OR_imagenetObj); - curr->imageUrl = elems[1]; + curr->id = atoi(line.c_str()); + numberToString(validation.back().size()+1, curr->image); + curr->image = "val/ILSVRC2010_val_" + curr->image + ".JPEG"; - string id(elems[0]); - elems.clear(); - split(id, elems, '_'); + validation.back().push_back(curr); + } + + vector testGT; + ifstream infileTest((path + "ILSVRC2010_test_ground_truth.txt").c_str()); + while (getline(infileTest, line)) + { + testGT.push_back(atoi(line.c_str())); + } + if (testGT.size()==0) // have no test labels, set them to 1000 - unknown + { + for (int i=0; i<150000; ++i) + { + testGT.push_back(1000); // unknown + } + } - curr->wnid = elems[0]; - curr->id2 = atoi(elems[1].c_str()); + for (vector::iterator it=testGT.begin(); it!=testGT.end(); ++it) + { + Ptr curr(new OR_imagenetObj); + curr->id = *it; + numberToString(test.back().size()+1, curr->image); + curr->image = "test/ILSVRC2010_test_" + curr->image + ".JPEG"; - train.back().push_back(curr); + test.back().push_back(curr); } }