#include #include #include #include using namespace cv; using namespace cv::dnn; #include #include #include using namespace std; const size_t width = 300; const size_t height = 300; static Mat getMean(const size_t& imageHeight, const size_t& imageWidth) { Mat mean; const int meanValues[3] = {104, 117, 123}; vector meanChannels; for(int i = 0; i < 3; i++) { Mat channel((int)imageHeight, (int)imageWidth, CV_32F, Scalar(meanValues[i])); meanChannels.push_back(channel); } cv::merge(meanChannels, mean); return mean; } static Mat preprocess(const Mat& frame) { Mat preprocessed; frame.convertTo(preprocessed, CV_32F); resize(preprocessed, preprocessed, Size(width, height)); //SSD accepts 300x300 RGB-images Mat mean = getMean(width, height); cv::subtract(preprocessed, mean, preprocessed); return preprocessed; } const char* about = "This sample uses Single-Shot Detector " "(https://arxiv.org/abs/1512.02325)" "to detect objects on image\n"; // TODO: link const char* params = "{ help | false | print usage }" "{ proto | | model configuration }" "{ model | | model weights }" "{ image | | image for detection }" "{ min_confidence | 0.5 | min confidence }"; int main(int argc, char** argv) { cv::CommandLineParser parser(argc, argv, params); if (parser.get("help")) { std::cout << about << std::endl; parser.printMessage(); return 0; } String modelConfiguration = parser.get("proto"); String modelBinary = parser.get("model"); //! [Initialize network] dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); //! [Initialize network] if (net.empty()) { cerr << "Can't load network by using the following files: " << endl; cerr << "prototxt: " << modelConfiguration << endl; cerr << "caffemodel: " << modelBinary << endl; cerr << "Models can be downloaded here:" << endl; cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl; exit(-1); } cv::Mat frame = cv::imread(parser.get("image"), -1); if (frame.channels() == 4) cvtColor(frame, frame, COLOR_BGRA2BGR); //! [Prepare blob] Mat preprocessedFrame = preprocess(frame); Mat inputBlob = blobFromImage(preprocessedFrame, 1.0f, Size(), Scalar(), false); //Convert Mat to batch of images //! [Prepare blob] //! [Set input blob] net.setInput(inputBlob, "data"); //set the network input //! [Set input blob] //! [Make forward pass] Mat detection = net.forward("detection_out"); //compute output //! [Make forward pass] Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr()); float confidenceThreshold = parser.get("min_confidence"); for(int i = 0; i < detectionMat.rows; i++) { float confidence = detectionMat.at(i, 2); if(confidence > confidenceThreshold) { size_t objectClass = (size_t)(detectionMat.at(i, 1)); float xLeftBottom = detectionMat.at(i, 3) * frame.cols; float yLeftBottom = detectionMat.at(i, 4) * frame.rows; float xRightTop = detectionMat.at(i, 5) * frame.cols; float yRightTop = detectionMat.at(i, 6) * frame.rows; std::cout << "Class: " << objectClass << std::endl; std::cout << "Confidence: " << confidence << std::endl; std::cout << " " << xLeftBottom << " " << yLeftBottom << " " << xRightTop << " " << yRightTop << std::endl; Rect object((int)xLeftBottom, (int)yLeftBottom, (int)(xRightTop - xLeftBottom), (int)(yRightTop - yLeftBottom)); rectangle(frame, object, Scalar(0, 255, 0)); } } imshow("detections", frame); waitKey(); return 0; } // main