Updated Halide tests. Simplified batch norm using Halide. Bug fix in convolutional layer.

pull/1249/head
dkurt 8 years ago
parent e551d15c2b
commit 37cf4974a4
  1. 4
      modules/dnn/perf/perf_halide_net.cpp
  2. 29
      modules/dnn/src/layers/batch_norm_layer.cpp
  3. 2
      modules/dnn/src/layers/convolution_layer.cpp
  4. 20
      modules/dnn/test/test_halide_nets.cpp

@ -39,7 +39,7 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
else else
CV_Error(Error::StsNotImplemented, "Unknown framework " + framework); CV_Error(Error::StsNotImplemented, "Unknown framework " + framework);
net->setInput(blobFromImage(input, 1.0, false)); net->setInput(blobFromImage(input, 1.0, Size(), Scalar(), false));
net->setPreferableBackend(DNN_BACKEND_HALIDE); net->setPreferableBackend(DNN_BACKEND_HALIDE);
net->setPreferableTarget(targetId); net->setPreferableTarget(targetId);
net->setHalideScheduler(scheduler); net->setHalideScheduler(scheduler);
@ -52,7 +52,7 @@ static void loadNet(std::string weights, std::string proto, std::string schedule
PERF_TEST(GoogLeNet, HalidePerfTest) PERF_TEST(GoogLeNet, HalidePerfTest)
{ {
Net net; Net net;
loadNet("dnn/bvlc_googlenet2.caffemodel", "dnn/bvlc_googlenet.prototxt", loadNet("dnn/bvlc_googlenet.caffemodel", "dnn/bvlc_googlenet.prototxt",
"", 227, 227, "prob", "caffe", DNN_TARGET_CPU, &net); "", 227, 227, "prob", "caffe", DNN_TARGET_CPU, &net);
TEST_CYCLE() net.forward(); TEST_CYCLE() net.forward();
SANITY_CHECK_NOTHING(); SANITY_CHECK_NOTHING();

@ -167,32 +167,9 @@ public:
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name)); Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
Halide::Var x("x"), y("y"), c("c"), n("n"); Halide::Var x("x"), y("y"), c("c"), n("n");
const int weightsBlobIndex = 2; const int numChannels = weights_.total();
const int biasBlobIndex = weightsBlobIndex + hasWeights; auto weights = wrapToHalideBuffer(weights_, {numChannels});
const int numChannels = blobs[0].total(); auto bias = wrapToHalideBuffer(bias_, {numChannels});
float* meanData = (float*)blobs[0].data;
float* stdData = (float*)blobs[1].data;
float* weightsData = (hasWeights ? (float*)blobs[weightsBlobIndex].data : NULL);
float* biasData = (hasBias ? (float*)blobs[biasBlobIndex].data : NULL);
float varMeanScale = 1.f;
if (!hasWeights && !hasBias) {
varMeanScale = *blobs[2].ptr<float>();
if (varMeanScale != 0)
varMeanScale = 1/varMeanScale;
}
Halide::Buffer<float> weights(numChannels);
Halide::Buffer<float> bias(numChannels);
for (int i = 0; i < numChannels; ++i)
{
weights(i) = (hasWeights ? weightsData[i] : 1.0f) /
sqrt(stdData[i] * varMeanScale + epsilon);
bias(i) = (hasBias ? biasData[i] : 0.0f) -
weights(i) * meanData[i] * varMeanScale;
}
weights.set_host_dirty();
bias.set_host_dirty();
top(x, y, c, n) = input * weights(c) + bias(c); top(x, y, c, n) = input * weights(c) + bias(c);
return top; return top;
} }

@ -625,7 +625,7 @@ public:
{ {
// prepare weightsMat where each row is aligned and has enough zero padding on the right to // prepare weightsMat where each row is aligned and has enough zero padding on the right to
// use vectorized (i.e. with intrinsics) loops without tail processing // use vectorized (i.e. with intrinsics) loops without tail processing
Mat wm = blobs[0].reshape(1, outCn); Mat wm = blobs[0].reshape(1, outCn).clone();
if( wm.step1() % VEC_ALIGN != 0 ) if( wm.step1() % VEC_ALIGN != 0 )
{ {
int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); int newcols = (int)alignSize(wm.step1(), VEC_ALIGN);

@ -34,7 +34,7 @@ static void loadNet(const std::string& weights, const std::string& proto,
static void test(const std::string& weights, const std::string& proto, static void test(const std::string& weights, const std::string& proto,
const std::string& scheduler, int inWidth, int inHeight, const std::string& scheduler, int inWidth, int inHeight,
const std::string& outputLayer, const std::string& framework, const std::string& outputLayer, const std::string& framework,
int targetId) int targetId, double l1 = 1e-5, double lInf = 1e-4)
{ {
Mat input(inHeight, inWidth, CV_32FC3), outputDefault, outputHalide; Mat input(inHeight, inWidth, CV_32FC3), outputDefault, outputHalide;
randu(input, 0.0f, 1.0f); randu(input, 0.0f, 1.0f);
@ -43,23 +43,23 @@ static void test(const std::string& weights, const std::string& proto,
loadNet(weights, proto, framework, &netDefault); loadNet(weights, proto, framework, &netDefault);
loadNet(weights, proto, framework, &netHalide); loadNet(weights, proto, framework, &netHalide);
netDefault.setInput(blobFromImage(input.clone(), 1.0f, false)); netDefault.setInput(blobFromImage(input.clone(), 1.0f, Size(), Scalar(), false));
outputDefault = netDefault.forward(outputLayer).clone(); outputDefault = netDefault.forward(outputLayer).clone();
netHalide.setInput(blobFromImage(input.clone(), 1.0f, false)); netHalide.setInput(blobFromImage(input.clone(), 1.0f, Size(), Scalar(), false));
netHalide.setPreferableBackend(DNN_BACKEND_HALIDE); netHalide.setPreferableBackend(DNN_BACKEND_HALIDE);
netHalide.setPreferableTarget(targetId); netHalide.setPreferableTarget(targetId);
netHalide.setHalideScheduler(scheduler); netHalide.setHalideScheduler(scheduler);
outputHalide = netHalide.forward(outputLayer).clone(); outputHalide = netHalide.forward(outputLayer).clone();
normAssert(outputDefault, outputHalide); normAssert(outputDefault, outputHalide, "First run", l1, lInf);
// An extra test: change input. // An extra test: change input.
input *= 0.1f; input *= 0.1f;
netDefault.setInput(blobFromImage(input.clone(), 1.0, false)); netDefault.setInput(blobFromImage(input.clone(), 1.0, Size(), Scalar(), false));
netHalide.setInput(blobFromImage(input.clone(), 1.0, false)); netHalide.setInput(blobFromImage(input.clone(), 1.0, Size(), Scalar(), false));
normAssert(outputDefault, outputHalide); normAssert(outputDefault, outputHalide, "Second run", l1, lInf);
// Swap backends. // Swap backends.
netHalide.setPreferableBackend(DNN_BACKEND_DEFAULT); netHalide.setPreferableBackend(DNN_BACKEND_DEFAULT);
@ -71,7 +71,7 @@ static void test(const std::string& weights, const std::string& proto,
netDefault.setHalideScheduler(scheduler); netDefault.setHalideScheduler(scheduler);
outputHalide = netDefault.forward(outputLayer).clone(); outputHalide = netDefault.forward(outputLayer).clone();
normAssert(outputDefault, outputHalide); normAssert(outputDefault, outputHalide, "Swap backends", l1, lInf);
} }
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
@ -119,7 +119,7 @@ TEST(Reproducibility_ENet_Halide, Accuracy)
{ {
test(findDataFile("dnn/Enet-model-best.net", false), "", test(findDataFile("dnn/Enet-model-best.net", false), "",
findDataFile("dnn/halide_scheduler_enet.yml", false), findDataFile("dnn/halide_scheduler_enet.yml", false),
512, 512, "l367_Deconvolution", "torch", DNN_TARGET_CPU); 512, 512, "l367_Deconvolution", "torch", DNN_TARGET_CPU, 2e-5, 0.15);
}; };
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// OpenCL target // OpenCL target
@ -166,7 +166,7 @@ TEST(Reproducibility_ENet_Halide_opencl, Accuracy)
{ {
test(findDataFile("dnn/Enet-model-best.net", false), "", test(findDataFile("dnn/Enet-model-best.net", false), "",
findDataFile("dnn/halide_scheduler_opencl_enet.yml", false), findDataFile("dnn/halide_scheduler_opencl_enet.yml", false),
512, 512, "l367_Deconvolution", "torch", DNN_TARGET_OPENCL); 512, 512, "l367_Deconvolution", "torch", DNN_TARGET_OPENCL, 2e-5, 0.14);
}; };
#endif // HAVE_HALIDE #endif // HAVE_HALIDE

Loading…
Cancel
Save