parent
645260af34
commit
dc93eedecf
26 changed files with 3138 additions and 7 deletions
@ -0,0 +1,141 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
namespace cvtest |
||||
{ |
||||
|
||||
#ifdef HAVE_HALIDE |
||||
using namespace cv; |
||||
using namespace dnn; |
||||
|
||||
static void loadNet(const std::string& weights, const std::string& proto, |
||||
const std::string& scheduler, int inWidth, int inHeight, |
||||
const std::string& outputLayer, const std::string& framework, |
||||
int targetId, Net* net, int* outputLayerId) |
||||
{ |
||||
Mat input(inHeight, inWidth, CV_32FC3); |
||||
randu(input, 0.0f, 1.0f); |
||||
|
||||
if (framework == "caffe") |
||||
{ |
||||
*net = cv::dnn::readNetFromCaffe(proto, weights); |
||||
} |
||||
else if (framework == "torch") |
||||
{ |
||||
*net = cv::dnn::readNetFromTorch(weights); |
||||
} |
||||
else if (framework == "tensorflow") |
||||
{ |
||||
*net = cv::dnn::readNetFromTensorflow(weights); |
||||
} |
||||
else |
||||
CV_Error(Error::StsNotImplemented, "Unknown framework " + framework); |
||||
|
||||
net->setBlob("", cv::dnn::blobFromImage(input, 1.0, false)); |
||||
net->setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
net->compileHalide(scheduler); |
||||
*outputLayerId = net->getLayerId(outputLayer); |
||||
net->forward(*outputLayerId); |
||||
} |
||||
|
||||
PERF_TEST(GoogLeNet, HalidePerfTest) |
||||
{ |
||||
Net net; |
||||
int outputLayerId; |
||||
loadNet(findDataFile("dnn/bvlc_googlenet.caffemodel"), |
||||
findDataFile("dnn/bvlc_googlenet.prototxt"), |
||||
"", 227, 227, "prob", "caffe", DNN_TARGET_CPU, &net, &outputLayerId); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
net.forward(outputLayerId); |
||||
} |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
|
||||
PERF_TEST(AlexNet, HalidePerfTest) |
||||
{ |
||||
Net net; |
||||
int outputLayerId; |
||||
loadNet(findDataFile("dnn/bvlc_alexnet.caffemodel"), |
||||
findDataFile("dnn/bvlc_alexnet.prototxt"), |
||||
findDataFile("dnn/halide_scheduler_alexnet.yml"), |
||||
227, 227, "prob", "caffe", DNN_TARGET_CPU, &net, &outputLayerId); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
net.forward(outputLayerId); |
||||
} |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
|
||||
// PERF_TEST(ResNet50, HalidePerfTest)
|
||||
// {
|
||||
// Net net;
|
||||
// int outputLayerId;
|
||||
// loadNet(findDataFile("dnn/ResNet-50-model.caffemodel"),
|
||||
// findDataFile("dnn/ResNet-50-deploy.prototxt"),
|
||||
// findDataFile("dnn/halide_scheduler_resnet_50.yml"),
|
||||
// 224, 224, "prob", "caffe", DNN_TARGET_CPU, &net, &outputLayerId);
|
||||
//
|
||||
// TEST_CYCLE_N(10)
|
||||
// {
|
||||
// net.forward(outputLayerId);
|
||||
// }
|
||||
// SANITY_CHECK_NOTHING();
|
||||
// }
|
||||
|
||||
// PERF_TEST(SqueezeNet_v1_1, HalidePerfTest)
|
||||
// {
|
||||
// Net net;
|
||||
// int outputLayerId;
|
||||
// loadNet(findDataFile("dnn/squeezenet_v1_1.caffemodel"),
|
||||
// findDataFile("dnn/squeezenet_v1_1.prototxt"),
|
||||
// findDataFile("dnn/halide_scheduler_squeezenet_v1_1.yml"),
|
||||
// 227, 227, "prob", "caffe", DNN_TARGET_CPU, &net, &outputLayerId);
|
||||
//
|
||||
// TEST_CYCLE_N(10)
|
||||
// {
|
||||
// net.forward(outputLayerId);
|
||||
// }
|
||||
// SANITY_CHECK_NOTHING();
|
||||
// }
|
||||
|
||||
PERF_TEST(Inception_5h, HalidePerfTest) |
||||
{ |
||||
Net net; |
||||
int outputLayerId; |
||||
loadNet(findDataFile("dnn/tensorflow_inception_graph.pb"), "", |
||||
findDataFile("dnn/halide_scheduler_inception_5h.yml"), |
||||
224, 224, "softmax2", "tensorflow", DNN_TARGET_CPU, |
||||
&net, &outputLayerId); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
net.forward(outputLayerId); |
||||
} |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
|
||||
PERF_TEST(ENet, HalidePerfTest) |
||||
{ |
||||
Net net; |
||||
int outputLayerId; |
||||
loadNet(findDataFile("dnn/Enet-model-best.net"), "", |
||||
findDataFile("dnn/halide_scheduler_enet.yml"), |
||||
512, 256, "l367_Deconvolution", "torch", DNN_TARGET_CPU, |
||||
&net, &outputLayerId); |
||||
|
||||
TEST_CYCLE_N(10) |
||||
{ |
||||
net.forward(outputLayerId); |
||||
} |
||||
SANITY_CHECK_NOTHING(); |
||||
} |
||||
#endif // HAVE_HALIDE
|
||||
|
||||
} // namespace cvtest
|
@ -1,3 +1,12 @@ |
||||
#include "perf_precomp.hpp" |
||||
|
||||
CV_PERF_TEST_MAIN(dnn) |
||||
static const char* extraTestDataPath = |
||||
#ifdef WINRT |
||||
NULL; |
||||
#else |
||||
getenv("OPENCV_DNN_TEST_DATA_PATH"); |
||||
#endif |
||||
|
||||
CV_PERF_TEST_MAIN(dnn, |
||||
extraTestDataPath ? (void)cvtest::addDataSearchPath(extraTestDataPath) : (void)0 |
||||
) |
||||
|
@ -0,0 +1,126 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
// Sample of using Halide backend in OpenCV deep learning module.
|
||||
// Based on dnn/samples/caffe_googlenet.cpp.
|
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include <cstdlib> |
||||
|
||||
/* Find best class for the blob (i. e. class with maximal probability) */ |
||||
void getMaxClass(const Mat &probBlob, int *classId, double *classProb) |
||||
{ |
||||
Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
|
||||
Point classNumber; |
||||
|
||||
minMaxLoc(probMat, NULL, classProb, NULL, &classNumber); |
||||
*classId = classNumber.x; |
||||
} |
||||
|
||||
std::vector<std::string> readClassNames(const char *filename = "synset_words.txt") |
||||
{ |
||||
std::vector<std::string> classNames; |
||||
|
||||
std::ifstream fp(filename); |
||||
if (!fp.is_open()) |
||||
{ |
||||
std::cerr << "File with classes labels not found: " << filename << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
std::string name; |
||||
while (!fp.eof()) |
||||
{ |
||||
std::getline(fp, name); |
||||
if (name.length()) |
||||
classNames.push_back( name.substr(name.find(' ')+1) ); |
||||
} |
||||
|
||||
fp.close(); |
||||
return classNames; |
||||
} |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
initModule(); // Required if OpenCV is built as static libs.
|
||||
|
||||
std::string modelTxt = "train_val.prototxt"; |
||||
std::string modelBin = "squeezenet_v1.1.caffemodel"; |
||||
std::string imageFile = (argc > 1) ? argv[1] : "space_shuttle.jpg"; |
||||
|
||||
//! [Read and initialize network]
|
||||
Net net = dnn::readNetFromCaffe(modelTxt, modelBin); |
||||
//! [Read and initialize network]
|
||||
|
||||
//! [Check that network was read successfully]
|
||||
if (net.empty()) |
||||
{ |
||||
std::cerr << "Can't load network by using the following files: " << std::endl; |
||||
std::cerr << "prototxt: " << modelTxt << std::endl; |
||||
std::cerr << "caffemodel: " << modelBin << std::endl; |
||||
std::cerr << "SqueezeNet v1.1 can be downloaded from:" << std::endl; |
||||
std::cerr << "https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1" << std::endl; |
||||
exit(-1); |
||||
} |
||||
//! [Check that network was read successfully]
|
||||
|
||||
//! [Prepare blob]
|
||||
Mat img = imread(imageFile); |
||||
if (img.empty()) |
||||
{ |
||||
std::cerr << "Can't read image from the file: " << imageFile << std::endl; |
||||
exit(-1); |
||||
} |
||||
if (img.channels() != 3) |
||||
{ |
||||
std::cerr << "Image " << imageFile << " isn't 3-channel" << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
resize(img, img, Size(227, 227)); // SqueezeNet v1.1 predict class by 3x227x227 input image.
|
||||
Mat inputBlob = blobFromImage(img, 1.0, false); // Convert Mat to 4-dimensional batch.
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setBlob("", inputBlob); // Set the network input.
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Enable Halide backend]
|
||||
net.setPreferableBackend(DNN_BACKEND_HALIDE); // Tell engine to use Halide where it possible.
|
||||
//! [Enable Halide backend]
|
||||
|
||||
//! [Compile Halide pipeline]
|
||||
net.compileHalide(); // Compile Halide pipeline.
|
||||
//! [Compile Halide pipeline]
|
||||
|
||||
//! [Make forward pass]
|
||||
net.forward(); // Compute output.
|
||||
//! [Make forward pass]
|
||||
|
||||
//! [Gather output]
|
||||
Mat prob = net.getBlob("prob"); // Gather output of "prob" layer.
|
||||
|
||||
int classId; |
||||
double classProb; |
||||
getMaxClass(prob, &classId, &classProb); // Find the best class.
|
||||
//! [Gather output]
|
||||
|
||||
//! [Print results]
|
||||
std::vector<std::string> classNames = readClassNames(); |
||||
std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl; |
||||
std::cout << "Probability: " << classProb * 100 << "%" << std::endl; |
||||
//! [Print results]
|
||||
|
||||
return 0; |
||||
} //main
|
@ -0,0 +1,260 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "halide_scheduler.hpp" |
||||
#include "op_halide.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
|
||||
#ifdef HAVE_HALIDE |
||||
static void applySplit(const FileNode& directive, Halide::Func& func, |
||||
const FileNode& params) |
||||
{ |
||||
for (const auto& varNode : directive) |
||||
{ |
||||
const std::string varName = varNode.name(); |
||||
const std::string factorName = (std::string)varNode; |
||||
Halide::Var var(varName); |
||||
Halide::Var outerVar(varName + "o"); |
||||
Halide::Var innerVar(varName + "i"); |
||||
// If split factor is integer or parameters map has parameter value.
|
||||
CV_Assert(varNode.isString() && !params[factorName].empty() || |
||||
varNode.isInt()); |
||||
int factor = (int)(varNode.isInt() ? varNode : params[factorName]); |
||||
func.split(var, outerVar, innerVar, factor); |
||||
} |
||||
} |
||||
|
||||
static void applyReorder(const FileNode& directive, Halide::Func& func) |
||||
{ |
||||
std::string varName; |
||||
const int numVars = directive.size(); |
||||
std::vector<Halide::VarOrRVar> reorderedVars; |
||||
reorderedVars.reserve(numVars); |
||||
for (int i = 0; i < numVars; ++i) |
||||
{ |
||||
directive[i] >> varName; |
||||
reorderedVars.push_back(Halide::Var(varName)); |
||||
} |
||||
func.reorder(reorderedVars); |
||||
} |
||||
|
||||
static void applyFuse(const FileNode& directive, Halide::Func& func) |
||||
{ |
||||
CV_Assert(directive["src"].size() >= 2); |
||||
CV_Assert(directive["dst"].size() == 1); |
||||
|
||||
std::string str; |
||||
directive["src"][0] >> str; |
||||
Halide::Var firstVar(str); |
||||
directive["src"][1] >> str; |
||||
Halide::Var secondVar(str); |
||||
directive["dst"] >> str; |
||||
Halide::Var dstVar(str); |
||||
|
||||
func.fuse(firstVar, secondVar, dstVar); |
||||
for (int i = 2, n = directive["src"].size(); i < n; ++i) |
||||
{ |
||||
directive["src"][i] >> str; |
||||
func.fuse(Halide::Var(str), dstVar, dstVar); |
||||
} |
||||
} |
||||
|
||||
static void applyParallel(const FileNode& directive, Halide::Func& func) |
||||
{ |
||||
std::string varName; |
||||
for (int i = 0, n = directive.size(); i < n; ++i) |
||||
{ |
||||
directive[i] >> varName; |
||||
func.parallel(Halide::Var(varName)); |
||||
} |
||||
} |
||||
|
||||
static void applyUnroll(const FileNode& directive, Halide::Func& func) |
||||
{ |
||||
std::string varName; |
||||
for (int i = 0, n = directive.size(); i < n; ++i) |
||||
{ |
||||
directive[i] >> varName; |
||||
func.unroll(Halide::Var(varName)); |
||||
} |
||||
} |
||||
|
||||
static void applyVectorize(const FileNode& directive, Halide::Func& func, |
||||
const FileNode& params) |
||||
{ |
||||
for (const auto& varNode : directive) |
||||
{ |
||||
const std::string varName = varNode.name(); |
||||
const std::string factorName = (std::string)varNode; |
||||
// If split factor is integer or parameters map has parameter value.
|
||||
CV_Assert(varNode.isString() && !params[factorName].empty() || |
||||
varNode.isInt()); |
||||
int factor = (int)(varNode.isInt() ? varNode : params[factorName]); |
||||
Halide::Var var(varName); |
||||
Halide::Var inner(varName + "v"); |
||||
func.split(var, var, inner, factor); |
||||
func.vectorize(inner); |
||||
} |
||||
} |
||||
|
||||
static void applyStoreAt(const FileNode& directive, Halide::Func& func, |
||||
std::map<std::string, Halide::Func>& funcsMap) |
||||
{ |
||||
for (const auto& funcNode : directive) |
||||
{ |
||||
const std::string targetFuncName = funcNode.name(); |
||||
if (funcsMap.find(targetFuncName) == funcsMap.end()) |
||||
CV_Error(cv::Error::StsParseError, "Function " + targetFuncName + |
||||
" is not represented in Halide pipeline"); |
||||
Halide::Func targetFunc = funcsMap[targetFuncName]; |
||||
func.store_at(targetFunc, (std::string)funcNode); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
static void applyComputeAt(const FileNode& directive, Halide::Func& func, |
||||
std::map<std::string, Halide::Func>& funcsMap) |
||||
{ |
||||
for (const auto& funcNode : directive) |
||||
{ |
||||
const std::string targetFuncName = funcNode.name(); |
||||
if (funcsMap.find(targetFuncName) == funcsMap.end()) |
||||
CV_Error(cv::Error::StsParseError, "Function " + targetFuncName + |
||||
" is not represented in Halide pipeline"); |
||||
Halide::Func targetFunc = funcsMap[targetFuncName]; |
||||
func.compute_at(targetFunc, (std::string)funcNode); |
||||
break; |
||||
} |
||||
} |
||||
|
||||
static void applyComputeRoot(const FileNode& directive, Halide::Func& func) |
||||
{ |
||||
bool compute_root; |
||||
directive >> compute_root; |
||||
if (compute_root) |
||||
func.compute_root(); |
||||
} |
||||
|
||||
static void apply(const FileNode& directives, Halide::Func& func, |
||||
std::map<std::string, Halide::Func>& funcsMap, |
||||
const FileNode& params) |
||||
{ |
||||
for (const auto& directive : directives) |
||||
{ |
||||
if (directive.name() == "split") |
||||
applySplit(directive, func, params); |
||||
else if (directive.name() == "reorder") |
||||
applyReorder(directive, func); |
||||
else if (directive.name() == "fuse") |
||||
applyFuse(directive, func); |
||||
else if (directive.name() == "parallel") |
||||
applyParallel(directive, func); |
||||
else if (directive.name() == "unroll") |
||||
applyUnroll(directive, func); |
||||
else if (directive.name() == "vectorize") |
||||
applyVectorize(directive, func, params); |
||||
else if (directive.name() == "store_at") |
||||
applyStoreAt(directive, func, funcsMap); |
||||
else if (directive.name() == "compute_at") |
||||
applyComputeAt(directive, func, funcsMap); |
||||
else if (directive.name() == "compute_root") |
||||
applyComputeRoot(directive, func); |
||||
else |
||||
CV_Error(Error::StsNotImplemented, "Scheduling directive " + |
||||
directive.name() + " is not implemented."); |
||||
} |
||||
} |
||||
|
||||
// Remove any numeric symbols after '$' sign.
|
||||
static std::string Deunique(std::string str) |
||||
{ |
||||
int pos = -1; |
||||
do |
||||
{ |
||||
pos = str.find('$'); |
||||
if (pos != -1) |
||||
{ |
||||
int len = str.find_first_not_of("0123456789", pos + 1) - pos; |
||||
str = str.replace(pos, len, ""); |
||||
} |
||||
} |
||||
while (pos != -1); |
||||
return str; |
||||
} |
||||
#endif // HAVE_HALIDE
|
||||
|
||||
HalideScheduler::HalideScheduler(const std::string& configFile) |
||||
{ |
||||
if (!configFile.empty()) |
||||
fs = FileStorage(configFile, FileStorage::READ); |
||||
} |
||||
|
||||
HalideScheduler::~HalideScheduler() |
||||
{ |
||||
if (fs.isOpened()) |
||||
fs.release(); |
||||
} |
||||
|
||||
bool HalideScheduler::process(Ptr<BackendNode>& node) |
||||
{ |
||||
#ifdef HAVE_HALIDE |
||||
if (!fs.isOpened()) |
||||
return false; |
||||
|
||||
const FileNode& scheduleNode = fs["scheduling"]; |
||||
if (scheduleNode.empty()) |
||||
CV_Error(cv::Error::StsParseError, "Scheduling file should has scheduling node"); |
||||
|
||||
std::string str; |
||||
std::map<std::string, Halide::Func> funcsMap; // Scheduled functions.
|
||||
// For every function, from top to bottom, we try to find a scheduling node.
|
||||
// Scheduling is successful (return true) if for the first function (top)
|
||||
// node is respresented.
|
||||
CV_Assert(!node.empty()); |
||||
std::vector<Halide::Func>& funcs = node.dynamicCast<HalideBackendNode>()->funcs; |
||||
for (int i = funcs.size() - 1; i >= 0; --i) |
||||
{ |
||||
Halide::Func& func = funcs[i]; |
||||
// For functions with the same name Halide generates unique names
|
||||
// for example func, func$1, func$2.
|
||||
// They are always formed with '$' and number.
|
||||
std::string funcName = Deunique(func.name()); |
||||
|
||||
const FileNode& funcNode = scheduleNode[funcName]; |
||||
if (!funcNode.empty()) |
||||
{ |
||||
if (!funcNode["pattern"].empty()) |
||||
{ |
||||
funcNode["pattern"] >> str; |
||||
if (fs["patterns"][str].empty()) |
||||
CV_Error(cv::Error::StsParseError, "Scheduling pattern " + str + |
||||
" is not defined"); |
||||
apply(fs["patterns"][str], func, funcsMap, funcNode["params"]); |
||||
} |
||||
else |
||||
{ |
||||
apply(funcNode, func, funcsMap, funcNode["params"]); |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
if (funcsMap.empty()) |
||||
return false; |
||||
} |
||||
funcsMap[funcName] = func; |
||||
} |
||||
return true; |
||||
#endif // HAVE_HALIDE
|
||||
return false; |
||||
} |
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
@ -0,0 +1,37 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#ifndef __OPENCV_DNN_HALIDE_SCHEDULER_HPP__ |
||||
#define __OPENCV_DNN_HALIDE_SCHEDULER_HPP__ |
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
|
||||
class HalideScheduler |
||||
{ |
||||
public: |
||||
HalideScheduler(const std::string& configFile); |
||||
|
||||
~HalideScheduler(); |
||||
|
||||
// Returns true if pipeline found in scheduling file.
|
||||
// If more than one function, returns true if the top function scheduled.
|
||||
// Other functions are optional to scheduling.
|
||||
bool process(Ptr<BackendNode>& node); |
||||
|
||||
private: |
||||
FileStorage fs; |
||||
}; |
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
|
||||
#endif // __OPENCV_DNN_HALIDE_SCHEDULER_HPP__
|
@ -0,0 +1,172 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "op_halide.hpp" |
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
|
||||
#ifdef HAVE_HALIDE |
||||
Halide::Buffer<float> wrapToHalideBuffer(const Mat& mat) |
||||
{ |
||||
int n, c, w, h; |
||||
getCanonicalSize(mat.size, &w, &h, &c, &n); |
||||
return wrapToHalideBuffer(mat, {w, h, c, n}); |
||||
} |
||||
|
||||
Halide::Buffer<float> wrapToHalideBuffer(const Mat& mat, |
||||
const std::vector<int>& sizes) |
||||
{ |
||||
Halide::Buffer<float> buffer((float*)mat.data, sizes); |
||||
buffer.set_host_dirty(); // Indicate that data is on CPU.
|
||||
return buffer; |
||||
} |
||||
|
||||
Halide::Buffer<> halideBuffer(const Ptr<BackendWrapper>& ptr) |
||||
{ |
||||
CV_Assert(!ptr.empty()); |
||||
return ptr.dynamicCast<HalideBackendWrapper>()->buffer; |
||||
} |
||||
|
||||
std::vector<Halide::Buffer<> > halideBuffers(const std::vector<Ptr<BackendWrapper> >& ptrs) |
||||
{ |
||||
std::vector<Halide::Buffer<> > vec; |
||||
vec.reserve(ptrs.size()); |
||||
for (const Ptr<BackendWrapper>& ptr : ptrs) |
||||
{ |
||||
vec.push_back(halideBuffer(ptr)); |
||||
} |
||||
return vec; |
||||
} |
||||
|
||||
void getCanonicalSize(const Halide::Buffer<>& buffer, int* width, int* height, |
||||
int* channels, int* batch) |
||||
{ |
||||
CV_Assert(buffer.dimensions() == 4); |
||||
*width = buffer.extent(0); |
||||
*height = buffer.extent(1); |
||||
*channels = buffer.extent(2); |
||||
*batch = buffer.extent(3); |
||||
} |
||||
|
||||
HalideBackendNode::HalideBackendNode(const Halide::Func& func) |
||||
: BackendNode(DNN_BACKEND_HALIDE), funcs(1, func) {} |
||||
|
||||
HalideBackendNode::HalideBackendNode(const std::vector<Halide::Func>& funcs) |
||||
: BackendNode(DNN_BACKEND_HALIDE), funcs(funcs) {} |
||||
|
||||
HalideBackendNode::HalideBackendNode(const Ptr<HalideBackendNode>& base, |
||||
const Halide::Func& top) |
||||
: BackendNode(DNN_BACKEND_HALIDE), funcs(base->funcs) |
||||
{ |
||||
funcs.back() = top; |
||||
} |
||||
|
||||
HalideBackendWrapper::HalideBackendWrapper(int targetId, const cv::Mat& m) |
||||
: BackendWrapper(DNN_BACKEND_HALIDE, targetId) |
||||
{ |
||||
buffer = wrapToHalideBuffer(m); |
||||
if (targetId != DNN_TARGET_CPU) |
||||
CV_Error(Error::StsNotImplemented, "Unknown target identifier"); |
||||
} |
||||
|
||||
HalideBackendWrapper::HalideBackendWrapper(const Ptr<BackendWrapper>& base, |
||||
const MatShape& shape) |
||||
: BackendWrapper(DNN_BACKEND_HALIDE, base->targetId) |
||||
{ |
||||
if (base->targetId != DNN_TARGET_CPU) |
||||
CV_Error(Error::StsNotImplemented, "Unknown target identifier"); |
||||
|
||||
int w, h, c, n; |
||||
getCanonicalSize(shape, &w, &h, &c, &n); |
||||
Halide::Buffer<float> baseBuffer = halideBuffer(base); |
||||
buffer = Halide::Buffer<float>((float*)baseBuffer.raw_buffer()->host, |
||||
{w, h, c, n}); |
||||
buffer.set_host_dirty(); // Indicate that data is on CPU.
|
||||
} |
||||
#endif // HAVE_HALIDE
|
||||
|
||||
void getCanonicalSize(const MatSize& size, int* width, int* height, |
||||
int* channels, int* batch) |
||||
{ |
||||
const int dims = size.p[-1]; |
||||
CV_Assert(dims == 2 || dims == 4); |
||||
*batch = size[0]; |
||||
*channels = size[1]; |
||||
if (dims == 4) |
||||
{ |
||||
*width = size[3]; |
||||
*height = size[2]; |
||||
} |
||||
else |
||||
{ |
||||
*width = 1; |
||||
*height = 1; |
||||
} |
||||
} |
||||
|
||||
void getCanonicalSize(const MatShape& shape, int* width, int* height, |
||||
int* channels, int* batch) |
||||
{ |
||||
const int dims = shape.size(); |
||||
CV_Assert(dims == 2 || dims == 4); |
||||
*batch = shape[0]; |
||||
*channels = shape[1]; |
||||
if (dims == 4) |
||||
{ |
||||
*width = shape[3]; |
||||
*height = shape[2]; |
||||
} |
||||
else |
||||
{ |
||||
*width = 1; |
||||
*height = 1; |
||||
} |
||||
} |
||||
|
||||
void compileHalide(std::vector<Mat> &outputs, Ptr<BackendNode>& node, int targetId) |
||||
{ |
||||
#ifdef HAVE_HALIDE |
||||
CV_Assert(!node.empty()); |
||||
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back(); |
||||
|
||||
int outW, outH, outC, outN; |
||||
Halide::Var x("x"), y("y"), c("c"), n("n"); |
||||
getCanonicalSize(outputs[0].size, &outW, &outH, &outC, &outN); |
||||
top.bound(x, 0, outW).bound(y, 0, outH) |
||||
.bound(c, 0, outC).bound(n, 0, outN); |
||||
|
||||
Halide::Target target = Halide::get_host_target(); |
||||
target.set_feature(Halide::Target::NoAsserts); |
||||
top.compile_jit(target); |
||||
#endif // HAVE_HALIDE
|
||||
} |
||||
|
||||
void forwardHalide(std::vector<Ptr<BackendWrapper> > &outputs, |
||||
const Ptr<BackendNode>& node) |
||||
{ |
||||
#ifdef HAVE_HALIDE |
||||
CV_Assert(!node.empty()); |
||||
Halide::Func& top = node.dynamicCast<HalideBackendNode>()->funcs.back(); |
||||
auto outputBuffers = halideBuffers(outputs); |
||||
top.realize(Halide::Realization(outputBuffers)); |
||||
#endif // HAVE_HALIDE
|
||||
} |
||||
|
||||
bool haveHalide() |
||||
{ |
||||
#ifdef HAVE_HALIDE |
||||
return true; |
||||
#else |
||||
return false; |
||||
#endif // HAVE_HALIDE
|
||||
} |
||||
|
||||
} // namespace dnn
|
||||
} // namespace cv
|
@ -0,0 +1,82 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#ifndef __OPENCV_DNN_OP_HALIDE_HPP__ |
||||
#define __OPENCV_DNN_OP_HALIDE_HPP__ |
||||
|
||||
#include "precomp.hpp" |
||||
|
||||
#ifdef HAVE_HALIDE |
||||
#include <Halide.h> |
||||
#endif // HAVE_HALIDE
|
||||
|
||||
namespace cv |
||||
{ |
||||
namespace dnn |
||||
{ |
||||
#ifdef HAVE_HALIDE |
||||
// Returns four-dimensional buffer with float32 type that wrap cv::Mat data.
|
||||
// No data copy here.
|
||||
Halide::Buffer<float> wrapToHalideBuffer(const Mat& mat); |
||||
|
||||
Halide::Buffer<float> wrapToHalideBuffer(const Mat& mat, |
||||
const std::vector<int>& shape); |
||||
|
||||
// Extract batch size, number of channels, width and height from buffer.
|
||||
void getCanonicalSize(const Halide::Buffer<>& buffer, int* width, int* height, |
||||
int* channels, int* batch); |
||||
|
||||
// Cast pointer and create copy of Halide buffer. No data copy.
|
||||
Halide::Buffer<> halideBuffer(const Ptr<BackendWrapper>& ptr); |
||||
|
||||
std::vector<Halide::Buffer<> > halideBuffers(const std::vector<Ptr<BackendWrapper> >& ptrs); |
||||
|
||||
class HalideBackendNode : public BackendNode |
||||
{ |
||||
public: |
||||
HalideBackendNode(const Halide::Func& func); |
||||
|
||||
HalideBackendNode(const std::vector<Halide::Func>& funcs); |
||||
|
||||
// Initialize from the <base> node but replace last function to <top>.
|
||||
// It's using in case of layers fusing when we want to keep functions of
|
||||
// root layer but replace top by fused one (i.e. conv+padding to relu+padding).
|
||||
HalideBackendNode(const Ptr<HalideBackendNode>& base, const Halide::Func& top); |
||||
|
||||
std::vector<Halide::Func> funcs; |
||||
}; |
||||
|
||||
class HalideBackendWrapper : public BackendWrapper |
||||
{ |
||||
public: |
||||
HalideBackendWrapper(int targetId, const cv::Mat& m); |
||||
|
||||
HalideBackendWrapper(const Ptr<BackendWrapper>& base, const MatShape& shape); |
||||
|
||||
Halide::Buffer<float> buffer; |
||||
}; |
||||
#endif // HAVE_HALIDE
|
||||
|
||||
// Extract batch size, number of channels, width and height from MatSize.
|
||||
void getCanonicalSize(const MatSize& size, int* width, int* height, |
||||
int* channels, int* batch); |
||||
|
||||
void getCanonicalSize(const MatShape& shape, int* width, int* height, |
||||
int* channels, int* batch); |
||||
|
||||
// Realize Halide pipeline into output blobs.
|
||||
void forwardHalide(std::vector<Ptr<BackendWrapper> > &outputs, |
||||
const Ptr<BackendNode>& node); |
||||
|
||||
// Compile Halide pipeline to specific target. Use outputs to set bounds of functions.
|
||||
void compileHalide(std::vector<Mat> &outputs, Ptr<BackendNode>& node, int targetId); |
||||
|
||||
bool haveHalide(); |
||||
} // namespace dnn
|
||||
} // namespace cv
|
||||
|
||||
#endif // __OPENCV_DNN_OP_HALIDE_HPP__
|
@ -0,0 +1,637 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
// This tests doesn't require any external data. They just compare outputs of
|
||||
// layers using different computation backends. Input and parameters are random.
|
||||
|
||||
namespace cvtest |
||||
{ |
||||
|
||||
#ifdef HAVE_HALIDE |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
using namespace testing; |
||||
|
||||
static void test(LayerParams& params, Mat& input) |
||||
{ |
||||
randu(input, -1.0f, 1.0f); |
||||
|
||||
Net net; |
||||
int lid = net.addLayer(params.name, params.type, params); |
||||
net.connect(0, 0, lid, 0); |
||||
|
||||
net.setBlob("", input); |
||||
net.allocate(); |
||||
net.forward(); |
||||
Mat outputDefault = net.getBlob(params.name).clone(); |
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
net.forward(); |
||||
Mat outputHalide = net.getBlob(params.name).clone(); |
||||
normAssert(outputDefault, outputHalide); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Convolution
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Size, bool> > Convolution; |
||||
TEST_P(Convolution, Accuracy) |
||||
{ |
||||
int inChannels = get<0>(GetParam())[0]; |
||||
int outChannels = get<0>(GetParam())[1]; |
||||
int group = get<0>(GetParam())[2]; |
||||
Size inSize = get<1>(GetParam()); |
||||
Size kernel = get<2>(GetParam()); |
||||
Size stride = get<3>(GetParam()); |
||||
Size pad = get<4>(GetParam()); |
||||
Size dilation = get<5>(GetParam()); |
||||
bool hasBias = get<6>(GetParam()); |
||||
|
||||
Mat weights({outChannels, inChannels / group, kernel.height, kernel.width}, CV_32F); |
||||
randu(weights, -1.0f, 1.0f); |
||||
|
||||
LayerParams lp; |
||||
lp.set("kernel_w", kernel.width); |
||||
lp.set("kernel_h", kernel.height); |
||||
lp.set("pad_w", pad.width); |
||||
lp.set("pad_h", pad.height); |
||||
lp.set("stride_w", stride.width); |
||||
lp.set("stride_h", stride.height); |
||||
lp.set("dilation_w", dilation.width); |
||||
lp.set("dilation_h", dilation.height); |
||||
lp.set("num_output", outChannels); |
||||
lp.set("group", group); |
||||
lp.set("bias_term", hasBias); |
||||
lp.type = "Convolution"; |
||||
lp.name = "testLayer"; |
||||
lp.blobs.push_back(weights); |
||||
if (hasBias) |
||||
{ |
||||
Mat bias({outChannels}, CV_32F); |
||||
randu(bias, -1.0f, 1.0f); |
||||
lp.blobs.push_back(bias); |
||||
} |
||||
Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F); |
||||
test(lp, input); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine( |
||||
/*in channels, out channels, group*/ |
||||
Values(Vec3i(6, 4, 1), Vec3i(6, 9, 1), |
||||
Vec3i(6, 4, 2), Vec3i(6, 9, 3)), |
||||
/*in size*/ Values(Size(5, 6)), |
||||
/*kernel*/ Values(Size(3, 1), Size(1, 3)), |
||||
/*stride*/ Values(Size(1, 1), Size(2, 2)), |
||||
/*pad*/ Values(Size(1, 0), Size(0, 1)), |
||||
/*dilation*/ Values(Size(1, 1), Size(2, 2)), |
||||
/*has bias*/ Bool() |
||||
)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Deconvolution
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<Vec3i, Size, Size, Size, Size, Vec4i, bool> > Deconvolution; |
||||
TEST_P(Deconvolution, Accuracy) |
||||
{ |
||||
int inChannels = get<0>(GetParam())[0]; |
||||
int outChannels = get<0>(GetParam())[1]; |
||||
int group = get<0>(GetParam())[2]; |
||||
Size inSize = get<1>(GetParam()); |
||||
Size kernel = get<2>(GetParam()); |
||||
Size pad = get<3>(GetParam()); |
||||
Size dilation = get<4>(GetParam()); |
||||
Size stride = Size(get<5>(GetParam())[0], get<5>(GetParam())[1]); |
||||
Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]); |
||||
bool hasBias = get<6>(GetParam()); |
||||
|
||||
Mat weights({outChannels, inChannels / group, kernel.height, kernel.width}, CV_32F); |
||||
randu(weights, -1.0f, 1.0f); |
||||
|
||||
LayerParams lp; |
||||
lp.set("kernel_w", kernel.width); |
||||
lp.set("kernel_h", kernel.height); |
||||
lp.set("pad_w", pad.width); |
||||
lp.set("pad_h", pad.height); |
||||
lp.set("stride_w", stride.width); |
||||
lp.set("stride_h", stride.height); |
||||
lp.set("dilation_w", dilation.width); |
||||
lp.set("dilation_h", dilation.height); |
||||
lp.set("adj_w", adjPad.width); |
||||
lp.set("adj_h", adjPad.height); |
||||
lp.set("num_output", outChannels); |
||||
lp.set("group", group); |
||||
lp.set("bias_term", hasBias); |
||||
lp.type = "Deconvolution"; |
||||
lp.name = "testLayer"; |
||||
lp.blobs.push_back(weights); |
||||
if (hasBias) |
||||
{ |
||||
Mat bias({outChannels}, CV_32F); |
||||
randu(bias, -1.0f, 1.0f); |
||||
lp.blobs.push_back(bias); |
||||
} |
||||
Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F); |
||||
test(lp, input); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine( |
||||
/*in channels, out channels, group*/ |
||||
Values(Vec3i(6, 4, 1), Vec3i(6, 9, 1)), |
||||
/*in size*/ Values(Size(5, 6)), |
||||
/*kernel*/ Values(Size(3, 1), Size(1, 3)), |
||||
/*pad*/ Values(Size(1, 0), Size(0, 1)), |
||||
/*dilation*/ Values(Size(1, 1), Size(2, 2)), |
||||
/*stride, adj. pad*/ Values(Vec4i(1,1, 0,0), Vec4i(2,2, 1,0), Vec4i(1,2, 0,1)), |
||||
/*has bias*/ Bool() |
||||
)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// LRN
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<Vec3i, int, Vec3f, bool, std::string> > LRN; |
||||
TEST_P(LRN, Accuracy) |
||||
{ |
||||
int inChannels = get<0>(GetParam())[0]; |
||||
Size inSize = Size(get<0>(GetParam())[1], get<0>(GetParam())[2]); |
||||
int localSize = get<1>(GetParam()); |
||||
float alpha = get<2>(GetParam())[0]; |
||||
float beta = get<2>(GetParam())[1]; |
||||
float bias = get<2>(GetParam())[2]; |
||||
bool normBySize = get<3>(GetParam()); |
||||
std::string nrmType = get<4>(GetParam()); |
||||
|
||||
LayerParams lp; |
||||
lp.set("norm_region", nrmType); |
||||
lp.set("local_size", localSize); |
||||
lp.set("alpha", alpha); |
||||
lp.set("beta", beta); |
||||
lp.set("bias", bias); |
||||
lp.set("norm_by_size", normBySize); |
||||
lp.type = "LRN"; |
||||
lp.name = "testLayer"; |
||||
|
||||
Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F); |
||||
test(lp, input); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, LRN, Combine( |
||||
/*input ch,w,h*/ Values(Vec3i(6, 5, 8), Vec3i(7, 11, 6)), |
||||
/*local size*/ Values(3, 5), |
||||
Values(Vec3f(0.9f, 1.0f, 1.1f), Vec3f(0.9f, 1.1f, 1.0f), |
||||
/*alpha, beta,*/ Vec3f(1.0f, 0.9f, 1.1f), Vec3f(1.0f, 1.1f, 0.9f), |
||||
/*bias */ Vec3f(1.1f, 0.9f, 1.0f), Vec3f(1.1f, 1.0f, 0.9f)), |
||||
/*norm_by_size*/ Bool(), |
||||
/*norm_type*/ Values("ACROSS_CHANNELS", "WITHIN_CHANNEL") |
||||
)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Average pooling
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int, Size, Size, Size> > AvePooling; |
||||
TEST_P(AvePooling, Accuracy) |
||||
{ |
||||
int inChannels = get<0>(GetParam()); |
||||
Size outSize = get<1>(GetParam());; // Input size will be computed from parameters.
|
||||
Size kernel = get<2>(GetParam()); |
||||
Size stride = get<3>(GetParam()); |
||||
|
||||
const int inWidth = (outSize.width - 1) * stride.width + kernel.width; |
||||
const int inHeight = (outSize.height - 1) * stride.height + kernel.height; |
||||
|
||||
LayerParams lp; |
||||
lp.set("pool", "ave"); |
||||
lp.set("kernel_w", kernel.width); |
||||
lp.set("kernel_h", kernel.height); |
||||
lp.set("stride_w", stride.width); |
||||
lp.set("stride_h", stride.height); |
||||
lp.type = "Pooling"; |
||||
lp.name = "testLayer"; |
||||
|
||||
Mat input({1, inChannels, inHeight, inWidth}, CV_32F); |
||||
test(lp, input); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, AvePooling, Combine( |
||||
/*in channels*/ Values(3, 4), |
||||
/*out size*/ Values(Size(1, 1), Size(2, 2), Size(3, 2), Size(4, 7)), |
||||
/*kernel*/ Values(Size(1, 1), Size(2, 2), Size(3, 3), Size(3, 2)), |
||||
/*stride*/ Values(Size(1, 1), Size(2, 2), Size(3, 2)) |
||||
)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Maximum pooling
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int, Size, Size, Size, Size> > MaxPooling; |
||||
TEST_P(MaxPooling, Accuracy) |
||||
{ |
||||
int inChannels = get<0>(GetParam()); |
||||
Size inSize = get<1>(GetParam()); |
||||
Size kernel = get<2>(GetParam()); |
||||
Size stride = get<3>(GetParam()); |
||||
Size pad = get<4>(GetParam()); |
||||
|
||||
LayerParams lp; |
||||
lp.set("pool", "max"); |
||||
lp.set("kernel_w", kernel.width); |
||||
lp.set("kernel_h", kernel.height); |
||||
lp.set("stride_w", stride.width); |
||||
lp.set("stride_h", stride.height); |
||||
lp.set("pad_w", pad.width); |
||||
lp.set("pad_h", pad.height); |
||||
lp.type = "Pooling"; |
||||
lp.name = "testLayer"; |
||||
|
||||
Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F); |
||||
test(lp, input); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, MaxPooling, Combine( |
||||
/*in channels*/ Values(3, 4), |
||||
/*in size*/ Values(Size(5, 5), Size(7, 6)), |
||||
/*kernel*/ Values(Size(2, 2), Size(3, 3), Size(3, 2)), |
||||
/*stride*/ Values(Size(1, 1), Size(2, 2), Size(3, 2)), |
||||
/*pad*/ Values(Size(0, 0), Size(1, 1), Size(0, 1)) |
||||
)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Fully-connected
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int, Size, int, bool> > FullyConnected; |
||||
TEST_P(FullyConnected, Accuracy) |
||||
{ |
||||
int inChannels = get<0>(GetParam()); |
||||
Size inSize = get<1>(GetParam()); |
||||
int outChannels = get<2>(GetParam()); |
||||
bool hasBias = get<3>(GetParam()); |
||||
|
||||
Mat weights(outChannels, inChannels * inSize.height * inSize.width, CV_32F); |
||||
randu(weights, -1.0f, 1.0f); |
||||
|
||||
Mat bias(1, outChannels, CV_32F); |
||||
randu(bias, -1.0f, 1.0f); |
||||
|
||||
LayerParams lp; |
||||
lp.set("num_output", outChannels); |
||||
lp.set("bias_term", hasBias); |
||||
lp.blobs.push_back(weights); |
||||
lp.blobs.push_back(bias); |
||||
lp.type = "InnerProduct"; |
||||
lp.name = "testLayer"; |
||||
|
||||
Mat input({1, inChannels, inSize.height, inSize.width}, CV_32F); |
||||
test(lp, input); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, FullyConnected, Combine( |
||||
/*in channels*/ Values(3, 4), |
||||
/*in size*/ Values(Size(5, 4), Size(4, 5), Size(1, 1)), |
||||
/*out channels*/ Values(3, 4), |
||||
/*has bias*/ Bool() |
||||
)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SoftMax
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
typedef TestWithParam<tuple<int> > SoftMax; |
||||
TEST_P(SoftMax, Accuracy) |
||||
{ |
||||
int inChannels = get<0>(GetParam()); |
||||
LayerParams lp; |
||||
lp.type = "SoftMax"; |
||||
lp.name = "testLayer"; |
||||
|
||||
Mat input({1, inChannels, 1, 1}, CV_32F); |
||||
test(lp, input); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, SoftMax, Values(3, 4, 5, 1024)); |
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Max pooling - unpooling
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
TEST(MaxPoolUnpool_Halide, Accuracy) |
||||
{ |
||||
LayerParams pool; |
||||
pool.set("pool", "max"); |
||||
pool.set("kernel_w", 2); |
||||
pool.set("kernel_h", 2); |
||||
pool.set("stride_w", 2); |
||||
pool.set("stride_h", 2); |
||||
pool.set("pad_w", 0); |
||||
pool.set("pad_h", 0); |
||||
pool.type = "Pooling"; |
||||
pool.name = "testPool"; |
||||
|
||||
LayerParams unpool; |
||||
unpool.set("pool_k_w", 2); |
||||
unpool.set("pool_k_h", 2); |
||||
unpool.set("pool_stride_w", 2); |
||||
unpool.set("pool_stride_h", 2); |
||||
unpool.set("pool_pad_w", 0); |
||||
unpool.set("pool_pad_h", 0); |
||||
unpool.type = "MaxUnpool"; |
||||
unpool.name = "testUnpool"; |
||||
|
||||
Net net; |
||||
int poolId = net.addLayer(pool.name, pool.type, pool); |
||||
net.connect(0, 0, poolId, 0); |
||||
|
||||
int unpoolId = net.addLayer(unpool.name, unpool.type, unpool); |
||||
net.connect(poolId, 0, unpoolId, 0); |
||||
net.connect(poolId, 1, unpoolId, 1); |
||||
|
||||
Mat input({1, 1, 4, 4}, CV_32F); |
||||
randu(input, -1.0f, 1.0f); |
||||
net.setBlob("", input); |
||||
net.forward(); |
||||
Mat outputDefault = net.getBlob("testUnpool").clone(); |
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
net.setBlob("", input); |
||||
net.forward(); |
||||
Mat outputHalide = net.getBlob("testUnpool").clone(); |
||||
normAssert(outputDefault, outputHalide); |
||||
} |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// AvePooling + in-place layers
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static const int kNumChannels = 3; |
||||
|
||||
void testInPlaceActivation(LayerParams& lp) |
||||
{ |
||||
EXPECT_FALSE(lp.name.empty()); |
||||
|
||||
LayerParams pool; |
||||
pool.set("pool", "ave"); |
||||
pool.set("kernel_w", 2); |
||||
pool.set("kernel_h", 2); |
||||
pool.set("stride_w", 2); |
||||
pool.set("stride_h", 2); |
||||
pool.type = "Pooling"; |
||||
|
||||
Net net; |
||||
int poolId = net.addLayer(pool.name, pool.type, pool); |
||||
net.connect(0, 0, poolId, 0); |
||||
net.addLayerToPrev(lp.name, lp.type, lp); |
||||
|
||||
Mat input({1, kNumChannels, 10, 10}, CV_32F); |
||||
randu(input, -1.0f, 1.0f); |
||||
net.setBlob("", input); |
||||
net.forward(); |
||||
Mat outputDefault = net.getBlob(lp.name).clone(); |
||||
|
||||
net.setBlob("", input); |
||||
net.setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
net.forward(); |
||||
Mat outputHalide = net.getBlob(lp.name).clone(); |
||||
normAssert(outputDefault, outputHalide); |
||||
} |
||||
|
||||
typedef TestWithParam<tuple<bool, bool, float> > BatchNorm; |
||||
TEST_P(BatchNorm, Accuracy) |
||||
{ |
||||
bool hasWeights = get<0>(GetParam()); |
||||
bool hasBias = get<1>(GetParam()); |
||||
float epsilon = get<2>(GetParam()); |
||||
|
||||
LayerParams lp; |
||||
lp.set("has_weight", hasWeights); |
||||
lp.set("has_bias", hasBias); |
||||
lp.set("eps", epsilon); |
||||
lp.type = "BatchNorm"; |
||||
lp.name = "testLayer"; |
||||
|
||||
lp.blobs.reserve(4); |
||||
for (int i = 0; i < 3; ++i) |
||||
lp.blobs.push_back(Mat({kNumChannels}, CV_32F)); |
||||
if (hasBias || hasWeights) |
||||
lp.blobs.push_back(Mat({kNumChannels}, CV_32F)); |
||||
|
||||
for (Mat& m : lp.blobs) |
||||
randu(m, 0.0f, 1.0f); |
||||
|
||||
testInPlaceActivation(lp); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, BatchNorm, Combine( |
||||
/*has weights*/ Bool(), |
||||
/*has bias*/ Bool(), |
||||
/*epsilon*/ Values(1e-3f, 1e-5f) |
||||
)); |
||||
|
||||
typedef TestWithParam<tuple<float> > ReLU; |
||||
TEST_P(ReLU, Accuracy) |
||||
{ |
||||
float negativeSlope = get<0>(GetParam()); |
||||
|
||||
LayerParams lp; |
||||
lp.set("negative_slope", negativeSlope); |
||||
lp.type = "ReLU"; |
||||
lp.name = "testLayer"; |
||||
testInPlaceActivation(lp); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Values( |
||||
/*negative slope*/ 2.0f, 0.3f, -0.1f |
||||
)); |
||||
|
||||
typedef TestWithParam<tuple<std::string> > NoParamActivation; |
||||
TEST_P(NoParamActivation, Accuracy) |
||||
{ |
||||
LayerParams lp; |
||||
lp.type = get<0>(GetParam()); |
||||
lp.name = "testLayer"; |
||||
testInPlaceActivation(lp); |
||||
} |
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, NoParamActivation, Values( |
||||
/*type*/ "TanH", "Sigmoid", "AbsVal", "BNLL" |
||||
)); |
||||
|
||||
typedef TestWithParam<tuple<Vec3f> > Power; |
||||
TEST_P(Power, Accuracy) |
||||
{ |
||||
float power = get<0>(GetParam())[0]; |
||||
float scale = get<0>(GetParam())[1]; |
||||
float shift = get<0>(GetParam())[2]; |
||||
|
||||
LayerParams lp; |
||||
lp.set("power", power); |
||||
lp.set("scale", scale); |
||||
lp.set("shift", shift); |
||||
lp.type = "Power"; |
||||
lp.name = "testLayer"; |
||||
testInPlaceActivation(lp); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Power, |
||||
/*power, scale, shift*/ Values(Vec3f(0.9f, 1.0f, 1.1f), Vec3f(0.9f, 1.1f, 1.0f), |
||||
Vec3f(1.0f, 0.9f, 1.1f), Vec3f(1.0f, 1.1f, 0.9f), |
||||
Vec3f(1.1f, 0.9f, 1.0f), Vec3f(1.1f, 1.0f, 0.9f)) |
||||
); |
||||
|
||||
TEST(ChannelsPReLU, Accuracy) |
||||
{ |
||||
LayerParams lp; |
||||
lp.type = "ChannelsPReLU"; |
||||
lp.name = "testLayer"; |
||||
lp.blobs.push_back(Mat({kNumChannels}, CV_32F)); |
||||
randu(lp.blobs[0], -1.0f, 1.0f); |
||||
|
||||
testInPlaceActivation(lp); |
||||
} |
||||
|
||||
typedef TestWithParam<tuple<bool> > Scale; |
||||
TEST_P(Scale, Accuracy) |
||||
{ |
||||
bool hasBias = get<0>(GetParam()); |
||||
|
||||
LayerParams lp; |
||||
lp.set("bias_term", hasBias); |
||||
lp.type = "Scale"; |
||||
lp.name = "testLayer"; |
||||
lp.blobs.push_back(Mat({kNumChannels}, CV_32F)); |
||||
randu(lp.blobs[0], -1.0f, 1.0f); |
||||
if (hasBias) |
||||
{ |
||||
lp.blobs.push_back(Mat({kNumChannels}, CV_32F)); |
||||
randu(lp.blobs[1], -1.0f, 1.0f); |
||||
} |
||||
testInPlaceActivation(lp); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Scale, Values(true, false)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Concat layer
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// input --- conv --- concat --- output
|
||||
// `--- conv ----^ ^ ^
|
||||
// `---- ... ------' '
|
||||
// `-----------------'
|
||||
typedef TestWithParam<tuple<Vec3i, Vec3i> > Concat; |
||||
TEST_P(Concat, Accuracy) |
||||
{ |
||||
Vec3i inSize = get<0>(GetParam()); |
||||
Vec3i numChannels = get<1>(GetParam()); |
||||
|
||||
Net net; |
||||
|
||||
LayerParams concatParam; |
||||
concatParam.type = "Concat"; |
||||
concatParam.name = "testLayer"; |
||||
int concatId = net.addLayer(concatParam.name, concatParam.type, concatParam); |
||||
net.connect(0, 0, concatId, 0); |
||||
|
||||
for (int i = 0, n = numChannels.channels; i < n; ++i) |
||||
{ |
||||
if (!numChannels[i]) |
||||
break; |
||||
|
||||
Mat weights({numChannels[i], inSize[0], 1, 1}, CV_32F); |
||||
randu(weights, -1.0f, 1.0f); |
||||
|
||||
LayerParams convParam; |
||||
convParam.set("kernel_w", 1); |
||||
convParam.set("kernel_h", 1); |
||||
convParam.set("num_output", numChannels[i]); |
||||
convParam.set("bias_term", false); |
||||
convParam.type = "Convolution"; |
||||
std::ostringstream ss; |
||||
ss << "convLayer" << i; |
||||
convParam.name = ss.str(); |
||||
convParam.blobs.push_back(weights); |
||||
|
||||
int convId = net.addLayer(convParam.name, convParam.type, convParam); |
||||
net.connect(0, 0, convId, 0); |
||||
net.connect(convId, 0, concatId, i + 1); |
||||
} |
||||
|
||||
Mat input({1, inSize[0], inSize[1], inSize[2]}, CV_32F); |
||||
randu(input, -1.0f, 1.0f); |
||||
|
||||
net.setBlob("", input); |
||||
net.forward(); |
||||
Mat outputDefault = net.getBlob(concatParam.name).clone(); |
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
net.forward(); |
||||
Mat outputHalide = net.getBlob(concatParam.name).clone(); |
||||
normAssert(outputDefault, outputHalide); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Concat, Combine( |
||||
/*input size*/ Values(Vec3i(1, 4, 5), Vec3i(2, 8, 6)), |
||||
/*channels*/ Values(Vec3i(2, 0, 0), Vec3i(3, 4, 0), Vec3i(1, 6, 2)) |
||||
)); |
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Element-wise layers
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// input --- conv --- eltwise --- output
|
||||
// `--- conv ----^ ^ ^
|
||||
// `---- ... ------' '
|
||||
// `-----------------'
|
||||
typedef TestWithParam<tuple<Vec3i, std::string, int> > Eltwise; |
||||
TEST_P(Eltwise, Accuracy) |
||||
{ |
||||
Vec3i inSize = get<0>(GetParam()); |
||||
std::string op = get<1>(GetParam()); |
||||
int numConv = get<2>(GetParam()); |
||||
|
||||
Net net; |
||||
|
||||
LayerParams eltwiseParam; |
||||
eltwiseParam.type = "Eltwise"; |
||||
eltwiseParam.name = "testLayer"; |
||||
int eltwiseId = net.addLayer(eltwiseParam.name, eltwiseParam.type, eltwiseParam); |
||||
net.connect(0, 0, eltwiseId, 0); |
||||
|
||||
for (int i = 0; i < numConv; ++i) |
||||
{ |
||||
Mat weights({inSize[0], inSize[0], 1, 1}, CV_32F); |
||||
randu(weights, -1.0f, 1.0f); |
||||
|
||||
LayerParams convParam; |
||||
convParam.set("kernel_w", 1); |
||||
convParam.set("kernel_h", 1); |
||||
convParam.set("num_output", inSize[0]); |
||||
convParam.set("bias_term", false); |
||||
convParam.type = "Convolution"; |
||||
std::ostringstream ss; |
||||
ss << "convLayer" << i; |
||||
convParam.name = ss.str(); |
||||
convParam.blobs.push_back(weights); |
||||
|
||||
int convId = net.addLayer(convParam.name, convParam.type, convParam); |
||||
net.connect(0, 0, convId, 0); |
||||
net.connect(convId, 0, eltwiseId, i + 1); |
||||
} |
||||
|
||||
Mat input({1, inSize[0], inSize[1], inSize[2]}, CV_32F); |
||||
randu(input, -1.0f, 1.0f); |
||||
|
||||
net.setBlob("", input); |
||||
net.forward(); |
||||
Mat outputDefault = net.getBlob(eltwiseParam.name).clone(); |
||||
|
||||
net.setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
net.forward(); |
||||
Mat outputHalide = net.getBlob(eltwiseParam.name).clone(); |
||||
normAssert(outputDefault, outputHalide); |
||||
} |
||||
|
||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Eltwise, Combine( |
||||
/*input size*/ Values(Vec3i(1, 4, 5), Vec3i(2, 8, 6)), |
||||
/*operation*/ Values("prod", "sum", "max"), |
||||
/*num convs*/ Values(1, 2, 3) |
||||
)); |
||||
#endif // HAVE_HALIDE
|
||||
|
||||
} // namespace cvtest
|
@ -0,0 +1,124 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2017, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
namespace cvtest |
||||
{ |
||||
|
||||
#ifdef HAVE_HALIDE |
||||
using namespace cv; |
||||
using namespace dnn; |
||||
|
||||
static void loadNet(const std::string& weights, const std::string& proto, |
||||
const std::string& framework, Net* net) |
||||
{ |
||||
if (framework == "caffe") |
||||
{ |
||||
*net = cv::dnn::readNetFromCaffe(proto, weights); |
||||
} |
||||
else if (framework == "torch") |
||||
{ |
||||
*net = cv::dnn::readNetFromTorch(weights); |
||||
} |
||||
else if (framework == "tensorflow") |
||||
{ |
||||
*net = cv::dnn::readNetFromTensorflow(weights); |
||||
} |
||||
else |
||||
CV_Error(Error::StsNotImplemented, "Unknown framework " + framework); |
||||
} |
||||
|
||||
static void test(const std::string& weights, const std::string& proto, |
||||
const std::string& scheduler, int inWidth, int inHeight, |
||||
const std::string& outputLayer, const std::string& framework, |
||||
int targetId) |
||||
{ |
||||
Mat input(inHeight, inWidth, CV_32FC3), outputDefault, outputHalide; |
||||
randu(input, 0.0f, 1.0f); |
||||
|
||||
Net netDefault, netHalide; |
||||
loadNet(weights, proto, framework, &netDefault); |
||||
loadNet(weights, proto, framework, &netHalide); |
||||
|
||||
netDefault.setBlob("", blobFromImage(input.clone(), 1.0f, false)); |
||||
netDefault.forward(netDefault.getLayerId(outputLayer)); |
||||
outputDefault = netDefault.getBlob(outputLayer).clone(); |
||||
|
||||
netHalide.setBlob("", blobFromImage(input.clone(), 1.0f, false)); |
||||
netHalide.setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
netHalide.compileHalide(scheduler); |
||||
netHalide.forward(netHalide.getLayerId(outputLayer)); |
||||
outputHalide = netHalide.getBlob(outputLayer).clone(); |
||||
|
||||
normAssert(outputDefault, outputHalide); |
||||
|
||||
// An extra test: change input.
|
||||
input *= 0.1f; |
||||
netDefault.setBlob("", blobFromImage(input.clone(), 1.0, false)); |
||||
netHalide.setBlob("", blobFromImage(input.clone(), 1.0, false)); |
||||
|
||||
normAssert(outputDefault, outputHalide); |
||||
|
||||
// Swap backends.
|
||||
netHalide.setPreferableBackend(DNN_BACKEND_DEFAULT); |
||||
netHalide.forward(netHalide.getLayerId(outputLayer)); |
||||
|
||||
netDefault.setPreferableBackend(DNN_BACKEND_HALIDE); |
||||
netDefault.compileHalide(scheduler); |
||||
netDefault.forward(netDefault.getLayerId(outputLayer)); |
||||
|
||||
outputDefault = netHalide.getBlob(outputLayer).clone(); |
||||
outputHalide = netDefault.getBlob(outputLayer).clone(); |
||||
normAssert(outputDefault, outputHalide); |
||||
} |
||||
|
||||
TEST(Reproducibility_GoogLeNet_Halide, Accuracy) |
||||
{ |
||||
test(findDataFile("dnn/bvlc_googlenet.caffemodel"), |
||||
findDataFile("dnn/bvlc_googlenet.prototxt"), |
||||
"", 227, 227, "prob", "caffe", DNN_TARGET_CPU); |
||||
}; |
||||
|
||||
TEST(Reproducibility_AlexNet_Halide, Accuracy) |
||||
{ |
||||
test(getOpenCVExtraDir() + "/dnn/bvlc_alexnet.caffemodel", |
||||
getOpenCVExtraDir() + "/dnn/bvlc_alexnet.prototxt", |
||||
getOpenCVExtraDir() + "/dnn/halide_scheduler_alexnet.yml", |
||||
227, 227, "prob", "caffe", DNN_TARGET_CPU); |
||||
}; |
||||
|
||||
// TEST(Reproducibility_ResNet_50_Halide, Accuracy)
|
||||
// {
|
||||
// test(getOpenCVExtraDir() + "/dnn/ResNet-50-model.caffemodel",
|
||||
// getOpenCVExtraDir() + "/dnn/ResNet-50-deploy.prototxt",
|
||||
// getOpenCVExtraDir() + "/dnn/halide_scheduler_resnet_50.yml",
|
||||
// 224, 224, "prob", "caffe", DNN_TARGET_CPU);
|
||||
// };
|
||||
|
||||
// TEST(Reproducibility_SqueezeNet_v1_1_Halide, Accuracy)
|
||||
// {
|
||||
// test(getOpenCVExtraDir() + "/dnn/squeezenet_v1_1.caffemodel",
|
||||
// getOpenCVExtraDir() + "/dnn/squeezenet_v1_1.prototxt",
|
||||
// getOpenCVExtraDir() + "/dnn/halide_scheduler_squeezenet_v1_1.yml",
|
||||
// 227, 227, "prob", "caffe", DNN_TARGET_CPU);
|
||||
// };
|
||||
|
||||
TEST(Reproducibility_Inception_5h_Halide, Accuracy) |
||||
{ |
||||
test(getOpenCVExtraDir() + "/dnn/tensorflow_inception_graph.pb", "", |
||||
getOpenCVExtraDir() + "/dnn/halide_scheduler_inception_5h.yml", |
||||
224, 224, "softmax2", "tensorflow", DNN_TARGET_CPU); |
||||
}; |
||||
|
||||
TEST(Reproducibility_ENet_Halide, Accuracy) |
||||
{ |
||||
test(getOpenCVExtraDir() + "/dnn/Enet-model-best.net", "", |
||||
getOpenCVExtraDir() + "/dnn/halide_scheduler_enet.yml", |
||||
512, 512, "l367_Deconvolution", "torch", DNN_TARGET_CPU); |
||||
}; |
||||
#endif // HAVE_HALIDE
|
||||
|
||||
} // namespace cvtest
|
@ -0,0 +1,135 @@ |
||||
# How to enable Halide backend for improve efficiency {#tutorial_dnn_halide} |
||||
|
||||
## Introduction |
||||
This tutorial guidelines how to run your models in OpenCV deep learning module |
||||
using Halide language backend. Halide is an open-source project that let us |
||||
write image processing algorithms in well-readable format, schedule computations |
||||
according to specific device and evaluate it with a quite good efficiency. |
||||
|
||||
An official website of the Halide project: http://halide-lang.org/. |
||||
|
||||
## Efficiency comparison |
||||
Measured on Intel® Core™ i7-6700K CPU @ 4.00GHz x 8. |
||||
|
||||
Single image forward pass (in milliseconds): |
||||
|
||||
| Architecture | MKL backend | Halide backend | Speed Up ratio | |
||||
|-----------------:|------------:|---------------:|---------------:| |
||||
| AlexNet | 16.55 | 22.38 | x0.73 | |
||||
| ResNet-50 | 63.69 | 73.91 | x0.86 | |
||||
| SqueezeNet v1.1 | 10.11 | 8.21 | x1.23 | |
||||
| Inception-5h | 35.38 | 37.06 | x0.95 | |
||||
| ENet @ 3x512x256 | 82.26 | 41.21 | x1.99 | |
||||
|
||||
Scheduling directives might be found @ [opencv_extra/testdata/dnn](https://github.com/opencv/opencv_extra/tree/master/testdata/dnn). |
||||
|
||||
## Requirements |
||||
### LLVM compiler |
||||
|
||||
@note LLVM compilation might take a long time. |
||||
|
||||
- Download LLVM source code from http://releases.llvm.org/4.0.0/llvm-4.0.0.src.tar.xz. |
||||
Unpack it. Let **llvm_root** is a root directory of source code. |
||||
|
||||
- Create directory **llvm_root**/tools/clang |
||||
|
||||
- Download Clang with the same version as LLVM. In our case it will be from |
||||
http://releases.llvm.org/4.0.0/cfe-4.0.0.src.tar.xz. Unpack it into |
||||
**llvm_root**/tools/clang. Note that it should be a root for Clang source code. |
||||
|
||||
- Build LLVM on Linux |
||||
@code |
||||
cd llvm_root |
||||
mkdir build && cd build |
||||
cmake -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_TARGETS_TO_BUILD="X86" -DLLVM_ENABLE_ASSERTIONS=ON -DCMAKE_BUILD_TYPE=Release .. |
||||
make -j4 |
||||
@endcode |
||||
|
||||
- Build LLVM on Windows (Developer Command Prompt) |
||||
@code |
||||
mkdir \\path-to-llvm-build\\ && cd \\path-to-llvm-build\\ |
||||
cmake.exe -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_ASSERTIONS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=\\path-to-llvm-install\\ -G "Visual Studio 14 Win64" \\path-to-llvm-src\\ |
||||
MSBuild.exe /m:4 /t:Build /p:Configuration=Release .\\INSTALL.vcxproj |
||||
@endcode |
||||
|
||||
@note `\\path-to-llvm-build\\` and `\\path-to-llvm-install\\` are different directories. |
||||
|
||||
### Halide language. |
||||
|
||||
- Download source code from GitHub repository, https://github.com/halide/Halide |
||||
or using git. The root directory will be a **halide_root**. |
||||
@code |
||||
git clone https://github.com/halide/Halide.git |
||||
@endcode |
||||
|
||||
- Build Halide on Linux |
||||
@code |
||||
cd halide_root |
||||
mkdir build && cd build |
||||
cmake -DLLVM_DIR=llvm_root/build/lib/cmake/llvm -DCMAKE_BUILD_TYPE=Release -DLLVM_VERSION=40 -DWITH_TESTS=OFF -DWITH_APPS=OFF -DWITH_TUTORIALS=OFF .. |
||||
make -j4 |
||||
@endcode |
||||
|
||||
- Build Halide on Windows (Developer Command Prompt) |
||||
@code |
||||
cd halide_root |
||||
mkdir build && cd build |
||||
cmake.exe -DLLVM_DIR=\\path-to-llvm-install\\lib\\cmake\\llvm -DLLVM_VERSION=40 -DWITH_TESTS=OFF -DWITH_APPS=OFF -DWITH_TUTORIALS=OFF -DCMAKE_BUILD_TYPE=Release -G "Visual Studio 14 Win64" .. |
||||
MSBuild.exe /m:4 /t:Build /p:Configuration=Release .\\ALL_BUILD.vcxproj |
||||
@endcode |
||||
|
||||
## Build OpenCV with Halide backend |
||||
When you build OpenCV add the following configuration flags: |
||||
|
||||
- `WITH_HALIDE` - enable Halide linkage |
||||
|
||||
- `HALIDE_ROOT_DIR` - path to Halide build directory |
||||
|
||||
How to build OpenCV with DNN module you may find in @ref tutorial_dnn_build. |
||||
|
||||
## Sample |
||||
|
||||
@include dnn/samples/squeezenet_halide.cpp |
||||
|
||||
## Explanation |
||||
Download Caffe model from SqueezeNet repository: [train_val.prototxt](https://github.com/DeepScale/SqueezeNet/blob/master/SqueezeNet_v1.1/train_val.prototxt) and [squeezenet_v1.1.caffemodel](https://github.com/DeepScale/SqueezeNet/blob/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel). |
||||
|
||||
Also you need file with names of [ILSVRC2012](http://image-net.org/challenges/LSVRC/2012/browse-synsets) classes: |
||||
[synset_words.txt](https://raw.githubusercontent.com/ludv1x/opencv_contrib/master/modules/dnn/samples/synset_words.txt). |
||||
|
||||
Put these files into working dir of this program example. |
||||
|
||||
-# Read and initialize network using path to .prototxt and .caffemodel files |
||||
@snippet dnn/samples/squeezenet_halide.cpp Read and initialize network |
||||
|
||||
-# Check that network was read successfully |
||||
@snippet dnn/samples/squeezenet_halide.cpp Check that network was read successfully |
||||
|
||||
-# Read input image and convert to the 4-dimensional blob, acceptable by SqueezeNet v1.1 |
||||
@snippet dnn/samples/squeezenet_halide.cpp Prepare blob |
||||
|
||||
-# Pass the blob to the network |
||||
@snippet dnn/samples/squeezenet_halide.cpp Set input blob |
||||
|
||||
-# Enable using Halide backend for layers where it is implemented |
||||
@snippet dnn/samples/squeezenet_halide.cpp Enable Halide backend |
||||
|
||||
-# Compile Halide functions to execute on CPU |
||||
@snippet dnn/samples/squeezenet_halide.cpp Compile Halide pipeline |
||||
|
||||
-# Make forward pass |
||||
@snippet dnn/samples/squeezenet_halide.cpp Make forward pass |
||||
Remember that the first forward pass after initialization require quite more |
||||
time that the next ones. It's because of runtime compilation of Halide pipelines |
||||
at the first invocation. |
||||
|
||||
-# Determine the best class |
||||
@snippet dnn/samples/squeezenet_halide.cpp Gather output |
||||
|
||||
-# Print results |
||||
@snippet dnn/samples/squeezenet_halide.cpp Print results |
||||
For our image we get: |
||||
|
||||
> Best class: #812 'space shuttle' |
||||
> |
||||
> Probability: 97.9812% |
@ -0,0 +1,83 @@ |
||||
# How to schedule your network for Halide backend {#tutorial_dnn_halide_scheduling} |
||||
|
||||
## Introduction |
||||
Halide code is the same for every device we use. But for achieving the satisfied |
||||
efficiency we should schedule computations properly. In this tutorial we describe |
||||
the ways to schedule your networks using Halide backend in OpenCV deep learning module. |
||||
|
||||
For better understanding of Halide scheduling you might want to read tutorials @ http://halide-lang.org/tutorials. |
||||
|
||||
If it's your first meeting with Halide in OpenCV, we recommend to start from @ref tutorial_dnn_halide. |
||||
|
||||
## Configuration files |
||||
When you call ```cv::dnn::Net::compileHalide```, you can pass a path to textual file |
||||
contains scheduling directives for specific device. |
||||
|
||||
Scheduling configuration files represented as YAML files where each node is a |
||||
scheduled function or a scheduling directive. |
||||
@code |
||||
relu1: |
||||
reorder: [x, c, y] |
||||
split: { y: 2, c: 8 } |
||||
parallel: [yo, co] |
||||
unroll: yi |
||||
vectorize: { x: 4 } |
||||
conv1_constant_exterior: |
||||
compute_at: { relu1: yi } |
||||
@endcode |
||||
|
||||
Considered use variables `n` for batch dimension, `c` for channels, |
||||
`y` for rows and `x` for columns. For variables after split are used names |
||||
with the same prefix but `o` and `i` suffixes for outer and inner variables |
||||
correspondingly. In example, for variable `x` in range `[0, 10)` directive |
||||
`split: { x: 2 }` gives new ones `xo` in range `[0, 5)` and `xi` in range `[0, 2)`. |
||||
Variable name `x` is no longer available in the same scheduling node. |
||||
|
||||
You can find scheduling examples at [opencv_extra/testdata/dnn](https://github.com/opencv/opencv_extra/tree/master/testdata/dnn) |
||||
and use it for schedule your networks. |
||||
|
||||
## Layers fusing |
||||
Thanks to layers fusing we can schedule only the top layers of fused sets. |
||||
Because for every output value we use the fused formula. |
||||
In example, if you have three layers Convolution + Scale + ReLU one by one, |
||||
@code |
||||
conv(x, y, c, n) = sum(...) + bias(c); |
||||
scale(x, y, c, n) = conv(x, y, c, n) * weights(c); |
||||
relu(x, y, c, n) = max(scale(x, y, c, n), 0); |
||||
@endcode |
||||
|
||||
fused function is something like |
||||
@code |
||||
relu(x, y, c, n) = max((sum(...) + bias(c)) * weights(c), 0); |
||||
@endcode |
||||
|
||||
So only function called `relu` require scheduling. |
||||
|
||||
## Scheduling patterns |
||||
Sometimes networks built using blocked structure that means some layer are |
||||
identical or quite similar. If you want to apply the same scheduling for |
||||
different layers accurate to tiling or vectorization factors, define scheduling |
||||
patterns in section `patterns` at the beginning of scheduling file. |
||||
Also, your patters may use some parametric variables. |
||||
@code |
||||
# At the beginning of the file |
||||
patterns: |
||||
fully_connected: |
||||
split: { c: c_split } |
||||
fuse: { src: [x, y, co], dst: block } |
||||
parallel: block |
||||
vectorize: { ci: c_split } |
||||
# Somewhere below |
||||
fc8: |
||||
pattern: fully_connected |
||||
params: { c_split: 8 } |
||||
@endcode |
||||
|
||||
## Automatic scheduling |
||||
Based on manual scheduling experience, proposed way to schedule layers |
||||
automatically. Just skip scheduling file path argument at ```cv::dnn::Net::compileHalide``` |
||||
for let DNN schedule your network. Sometimes it might be even better |
||||
than manual scheduling. |
||||
|
||||
You can mix both manual and automatic scheduling ways. Write scheduling file |
||||
and skip layers that you want to be scheduled automatically. |
Loading…
Reference in new issue