|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// License Agreement
|
|
|
|
// For Open Source Computer Vision Library
|
|
|
|
// (3-clause BSD License)
|
|
|
|
//
|
|
|
|
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * Neither the names of the copyright holders nor the names of the contributors
|
|
|
|
// may be used to endorse or promote products derived from this software
|
|
|
|
// without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall copyright holders or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
#include "test_precomp.hpp"
|
|
|
|
#include <opencv2/dnn/shape_utils.hpp>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <opencv2/core/ocl.hpp>
|
|
|
|
#include <opencv2/ts/ocl_test.hpp>
|
|
|
|
|
|
|
|
namespace cvtest
|
|
|
|
{
|
|
|
|
|
|
|
|
using namespace cv;
|
|
|
|
using namespace cv::dnn;
|
|
|
|
|
|
|
|
template<typename TString>
|
|
|
|
static std::string _tf(TString filename)
|
|
|
|
{
|
|
|
|
return (getOpenCVExtraDir() + "/dnn/") + filename;
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Test_Darknet, read_tiny_yolo_voc)
|
|
|
|
{
|
|
|
|
Net net = readNetFromDarknet(_tf("tiny-yolo-voc.cfg"));
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Test_Darknet, read_yolo_voc)
|
|
|
|
{
|
|
|
|
Net net = readNetFromDarknet(_tf("yolo-voc.cfg"));
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
OCL_TEST(Reproducibility_TinyYoloVoc, Accuracy)
|
|
|
|
{
|
|
|
|
Net net;
|
|
|
|
{
|
|
|
|
const string cfg = findDataFile("dnn/tiny-yolo-voc.cfg", false);
|
|
|
|
const string model = findDataFile("dnn/tiny-yolo-voc.weights", false);
|
|
|
|
net = readNetFromDarknet(cfg, model);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
net.setPreferableBackend(DNN_BACKEND_DEFAULT);
|
|
|
|
net.setPreferableTarget(DNN_TARGET_OPENCL);
|
|
|
|
|
|
|
|
// dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
|
|
|
|
Mat sample = imread(_tf("dog416.png"));
|
|
|
|
ASSERT_TRUE(!sample.empty());
|
|
|
|
|
|
|
|
Size inputSize(416, 416);
|
|
|
|
|
|
|
|
if (sample.size() != inputSize)
|
|
|
|
resize(sample, sample, inputSize);
|
|
|
|
|
|
|
|
net.setInput(blobFromImage(sample, 1 / 255.F), "data");
|
|
|
|
Mat out = net.forward("detection_out");
|
|
|
|
|
|
|
|
Mat detection;
|
|
|
|
const float confidenceThreshold = 0.24;
|
|
|
|
|
|
|
|
for (int i = 0; i < out.rows; i++) {
|
|
|
|
const int probability_index = 5;
|
|
|
|
const int probability_size = out.cols - probability_index;
|
|
|
|
float *prob_array_ptr = &out.at<float>(i, probability_index);
|
|
|
|
size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
|
|
|
|
float confidence = out.at<float>(i, (int)objectClass + probability_index);
|
|
|
|
|
|
|
|
if (confidence > confidenceThreshold)
|
|
|
|
detection.push_back(out.row(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
// obtained by: ./darknet detector test ./cfg/voc.data ./cfg/tiny-yolo-voc.cfg ./tiny-yolo-voc.weights -thresh 0.24 ./dog416.png
|
|
|
|
// There are 2 objects (6-car, 11-dog) with 25 values for each:
|
|
|
|
// { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
|
|
|
|
float ref_array[] = {
|
|
|
|
0.736762F, 0.239551F, 0.315440F, 0.160779F, 0.761977F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.761967F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
|
|
|
|
0.287486F, 0.653731F, 0.315579F, 0.534527F, 0.782737F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.780595F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
|
|
|
|
};
|
|
|
|
|
|
|
|
const int number_of_objects = 2;
|
|
|
|
Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array);
|
|
|
|
|
|
|
|
normAssert(ref, detection);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Reproducibility_TinyYoloVoc, Accuracy)
|
|
|
|
{
|
|
|
|
Net net;
|
|
|
|
{
|
|
|
|
const string cfg = findDataFile("dnn/tiny-yolo-voc.cfg", false);
|
|
|
|
const string model = findDataFile("dnn/tiny-yolo-voc.weights", false);
|
|
|
|
net = readNetFromDarknet(cfg, model);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
// dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
|
|
|
|
Mat sample = imread(_tf("dog416.png"));
|
|
|
|
ASSERT_TRUE(!sample.empty());
|
|
|
|
|
|
|
|
Size inputSize(416, 416);
|
|
|
|
|
|
|
|
if (sample.size() != inputSize)
|
|
|
|
resize(sample, sample, inputSize);
|
|
|
|
|
|
|
|
net.setInput(blobFromImage(sample, 1 / 255.F), "data");
|
|
|
|
Mat out = net.forward("detection_out");
|
|
|
|
|
|
|
|
Mat detection;
|
|
|
|
const float confidenceThreshold = 0.24;
|
|
|
|
|
|
|
|
for (int i = 0; i < out.rows; i++) {
|
|
|
|
const int probability_index = 5;
|
|
|
|
const int probability_size = out.cols - probability_index;
|
|
|
|
float *prob_array_ptr = &out.at<float>(i, probability_index);
|
|
|
|
size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
|
|
|
|
float confidence = out.at<float>(i, (int)objectClass + probability_index);
|
|
|
|
|
|
|
|
if (confidence > confidenceThreshold)
|
|
|
|
detection.push_back(out.row(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
// obtained by: ./darknet detector test ./cfg/voc.data ./cfg/tiny-yolo-voc.cfg ./tiny-yolo-voc.weights -thresh 0.24 ./dog416.png
|
|
|
|
// There are 2 objects (6-car, 11-dog) with 25 values for each:
|
|
|
|
// { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
|
|
|
|
float ref_array[] = {
|
|
|
|
0.736762F, 0.239551F, 0.315440F, 0.160779F, 0.761977F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.761967F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
|
|
|
|
0.287486F, 0.653731F, 0.315579F, 0.534527F, 0.782737F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.780595F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
|
|
|
|
};
|
|
|
|
|
|
|
|
const int number_of_objects = 2;
|
|
|
|
Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array);
|
|
|
|
|
|
|
|
normAssert(ref, detection);
|
|
|
|
}
|
|
|
|
|
|
|
|
OCL_TEST(Reproducibility_YoloVoc, Accuracy)
|
|
|
|
{
|
|
|
|
Net net;
|
|
|
|
{
|
|
|
|
const string cfg = findDataFile("dnn/yolo-voc.cfg", false);
|
|
|
|
const string model = findDataFile("dnn/yolo-voc.weights", false);
|
|
|
|
net = readNetFromDarknet(cfg, model);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
net.setPreferableBackend(DNN_BACKEND_DEFAULT);
|
|
|
|
net.setPreferableTarget(DNN_TARGET_OPENCL);
|
|
|
|
|
|
|
|
// dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
|
|
|
|
Mat sample = imread(_tf("dog416.png"));
|
|
|
|
ASSERT_TRUE(!sample.empty());
|
|
|
|
|
|
|
|
Size inputSize(416, 416);
|
|
|
|
|
|
|
|
if (sample.size() != inputSize)
|
|
|
|
resize(sample, sample, inputSize);
|
|
|
|
|
|
|
|
net.setInput(blobFromImage(sample, 1 / 255.F), "data");
|
|
|
|
Mat out = net.forward("detection_out");
|
|
|
|
|
|
|
|
Mat detection;
|
|
|
|
const float confidenceThreshold = 0.24;
|
|
|
|
|
|
|
|
for (int i = 0; i < out.rows; i++) {
|
|
|
|
const int probability_index = 5;
|
|
|
|
const int probability_size = out.cols - probability_index;
|
|
|
|
float *prob_array_ptr = &out.at<float>(i, probability_index);
|
|
|
|
size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
|
|
|
|
float confidence = out.at<float>(i, (int)objectClass + probability_index);
|
|
|
|
|
|
|
|
if (confidence > confidenceThreshold)
|
|
|
|
detection.push_back(out.row(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
// obtained by: ./darknet detector test ./cfg/voc.data ./cfg/yolo-voc.cfg ./yolo-voc.weights -thresh 0.24 ./dog416.png
|
|
|
|
// There are 3 objects (6-car, 1-bicycle, 11-dog) with 25 values for each:
|
|
|
|
// { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
|
|
|
|
float ref_array[] = {
|
|
|
|
0.740161F, 0.214100F, 0.325575F, 0.173418F, 0.750769F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.750469F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
|
|
|
|
0.501618F, 0.504757F, 0.461713F, 0.481310F, 0.783550F, 0.000000F, 0.780879F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
|
|
|
|
0.279968F, 0.638651F, 0.282737F, 0.600284F, 0.901864F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.901615F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
|
|
|
|
};
|
|
|
|
|
|
|
|
const int number_of_objects = 3;
|
|
|
|
Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array);
|
|
|
|
|
|
|
|
normAssert(ref, detection);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Reproducibility_YoloVoc, Accuracy)
|
|
|
|
{
|
|
|
|
Net net;
|
|
|
|
{
|
|
|
|
const string cfg = findDataFile("dnn/yolo-voc.cfg", false);
|
|
|
|
const string model = findDataFile("dnn/yolo-voc.weights", false);
|
|
|
|
net = readNetFromDarknet(cfg, model);
|
|
|
|
ASSERT_FALSE(net.empty());
|
|
|
|
}
|
|
|
|
|
|
|
|
// dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
|
|
|
|
Mat sample = imread(_tf("dog416.png"));
|
|
|
|
ASSERT_TRUE(!sample.empty());
|
|
|
|
|
|
|
|
Size inputSize(416, 416);
|
|
|
|
|
|
|
|
if (sample.size() != inputSize)
|
|
|
|
resize(sample, sample, inputSize);
|
|
|
|
|
|
|
|
net.setInput(blobFromImage(sample, 1 / 255.F), "data");
|
|
|
|
Mat out = net.forward("detection_out");
|
|
|
|
|
|
|
|
Mat detection;
|
|
|
|
const float confidenceThreshold = 0.24;
|
|
|
|
|
|
|
|
for (int i = 0; i < out.rows; i++) {
|
|
|
|
const int probability_index = 5;
|
|
|
|
const int probability_size = out.cols - probability_index;
|
|
|
|
float *prob_array_ptr = &out.at<float>(i, probability_index);
|
|
|
|
size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
|
|
|
|
float confidence = out.at<float>(i, (int)objectClass + probability_index);
|
|
|
|
|
|
|
|
if (confidence > confidenceThreshold)
|
|
|
|
detection.push_back(out.row(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
// obtained by: ./darknet detector test ./cfg/voc.data ./cfg/yolo-voc.cfg ./yolo-voc.weights -thresh 0.24 ./dog416.png
|
|
|
|
// There are 3 objects (6-car, 1-bicycle, 11-dog) with 25 values for each:
|
|
|
|
// { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
|
|
|
|
float ref_array[] = {
|
|
|
|
0.740161F, 0.214100F, 0.325575F, 0.173418F, 0.750769F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.750469F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
|
|
|
|
0.501618F, 0.504757F, 0.461713F, 0.481310F, 0.783550F, 0.000000F, 0.780879F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
|
|
|
|
0.279968F, 0.638651F, 0.282737F, 0.600284F, 0.901864F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.901615F,
|
|
|
|
0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
|
|
|
|
};
|
|
|
|
|
|
|
|
const int number_of_objects = 3;
|
|
|
|
Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array);
|
|
|
|
|
|
|
|
normAssert(ref, detection);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|