Fix deconvolution layer. Add batch norm layer with mean-variance normalization from TensorFlow.

pull/10850/head
Dmitry Kurtaev 7 years ago
parent ab0f0f26a1
commit a6baedd02c
  1. 37
      modules/dnn/src/layers/convolution_layer.cpp
  2. 8
      modules/dnn/src/layers/mvn_layer.cpp
  3. 103
      modules/dnn/src/tensorflow/tf_importer.cpp
  4. 6
      modules/dnn/test/test_tf_importer.cpp

@ -1127,8 +1127,25 @@ public:
int inpH = inputs[0][2];
int inpW = inputs[0][3];
int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
int outH = -1, outW = -1;
if (padMode.empty())
{
outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
}
else if (padMode == "VALID")
{
outH = stride.height * (inpH - 1) + kernel.height + adjustPad.height;
outW = stride.width * (inpW - 1) + kernel.width + adjustPad.width;
}
else if (padMode == "SAME")
{
outH = stride.height * (inpH - 1) + 1 + adjustPad.height;
outW = stride.width * (inpW - 1) + 1 + adjustPad.width;
}
else
CV_Error(Error::StsError, "Unsupported padding mode " + padMode);
int outCn = numOutput;
CV_Assert(outCn % blobs[0].size[1] == 0);
@ -1150,6 +1167,14 @@ public:
return false;
}
void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
{
BaseConvolutionLayerImpl::finalize(inputs, outputs);
getConvPoolPaddings(Size(outputs[0].size[3], outputs[0].size[2]),
Size(inputs[0]->size[3], inputs[0]->size[2]),
kernel, stride, padMode, dilation, pad);
}
class MatMulInvoker : public ParallelLoopBody
{
public:
@ -1316,6 +1341,7 @@ public:
int kernel_h, int kernel_w,
int pad_h, int pad_w,
int stride_h, int stride_w,
int height_col, int width_col,
float* data_im,
const float* biasvec,
bool is1x1)
@ -1329,8 +1355,8 @@ public:
t.kernel_h = kernel_h; t.kernel_w = kernel_w;
t.pad_h = pad_h; t.pad_w = pad_w;
t.stride_h = stride_h; t.stride_w = stride_w;
t.height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
t.width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
t.height_col = height_col;
t.width_col = width_col;
t.nstripes = nstripes;
t.is1x1 = is1x1;
t.biasvec = biasvec;
@ -1520,6 +1546,7 @@ public:
const Mat& inp = *inputs[ii];
Mat& out = outputs[ii];
int numImg = inp.size[0];
int inpH = inp.size[2], inpW = inp.size[3];
int outH = out.size[2], outW = out.size[3];
Mat convBlob = inputs[ii]->reshape(1, numImg*inpCn);
@ -1542,7 +1569,7 @@ public:
Col2ImInvoker::run(colMat.ptr<float>(), outGroupCn, outH, outW,
kernel.height, kernel.width, pad.height, pad.width,
stride.height, stride.width, dstMat.ptr<float>(),
stride.height, stride.width, inpH, inpW, dstMat.ptr<float>(),
curBiasMat.ptr<float>(), is1x1flag);
}
}

@ -273,6 +273,14 @@ public:
int i, newRows = 1;
for( i = 0; i < splitDim; i++ )
newRows *= inpBlob.size[i];
if (inpBlob.total() == newRows)
{
// MVN is applied to single values at an every row.
outBlob.setTo(0);
return;
}
Mat inpMat = inpBlob.reshape(1, newRows);
Mat outMat = outBlob.reshape(1, newRows);

@ -1160,8 +1160,35 @@ void TFImporter::populateNet(Net dstNet)
int id;
if (scaleMat.total() == 1) // is a scalar.
{
layerParams.set("scale", scaleMat.at<float>(0));
id = dstNet.addLayer(name, "Power", layerParams);
// Try to match with a LeakyRelu:
// node {
// name: "LeakyRelu/mul"
// op: "Mul"
// input: "LeakyRelu/alpha"
// input: "input"
// }
// node {
// name: "LeakyRelu/Maximum"
// op: "Maximum"
// input: "LeakyRelu/mul"
// input: "input"
// }
StrIntVector next_layers = getNextLayers(net, name, "Maximum");
if (!next_layers.empty())
{
int maximumLayerIdx = next_layers[0].second;
ExcludeLayer(net, maximumLayerIdx, 0, false);
layers_to_ignore.insert(next_layers[0].first);
layerParams.set("negative_slope", scaleMat.at<float>(0));
id = dstNet.addLayer(name, "ReLU", layerParams);
}
else
{
// Just a multiplication.
layerParams.set("scale", scaleMat.at<float>(0));
id = dstNet.addLayer(name, "Power", layerParams);
}
}
else // is a vector
{
@ -1241,16 +1268,37 @@ void TFImporter::populateNet(Net dstNet)
if (layer.input_size() != 5)
CV_Error(Error::StsNotImplemented,
"Expected gamma, beta, mean and std");
Pin inpId = parsePin(layer.input(0));
bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b();
layerParams.blobs.resize(4);
// gamma
blobFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[2]);
// beta
blobFromTensor(getConstBlob(layer, value_id, 2), layerParams.blobs[3]);
// mean
blobFromTensor(getConstBlob(layer, value_id, 3), layerParams.blobs[0]);
// std
blobFromTensor(getConstBlob(layer, value_id, 4), layerParams.blobs[1]);
Mat gamma, beta, mean, std;
blobFromTensor(getConstBlob(layer, value_id, 1), gamma);
blobFromTensor(getConstBlob(layer, value_id, 2), beta);
if (isTraining)
{
mean = Mat::zeros(1, beta.total(), CV_32F);
std = Mat::ones(1, beta.total(), CV_32F);
// Add an extra layer: Mean-Variance normalization
LayerParams mvnParams;
std::string mvnName = name + "/MVN";
CV_Assert(layer_id.find(mvnName) == layer_id.end());
int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams);
layer_id[mvnName] = mvnId;
connect(layer_id, dstNet, inpId, mvnId, 0);
inpId = Pin(mvnName);
}
else
{
blobFromTensor(getConstBlob(layer, value_id, 3), mean);
blobFromTensor(getConstBlob(layer, value_id, 4), std);
}
layerParams.blobs[0] = mean;
layerParams.blobs[1] = std;
layerParams.blobs[2] = gamma;
layerParams.blobs[3] = beta;
if (hasLayerAttr(layer, "epsilon"))
layerParams.set("eps", getLayerAttr(layer, "epsilon").f());
@ -1262,7 +1310,7 @@ void TFImporter::populateNet(Net dstNet)
layer_id[name] = id;
// one input only
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
connect(layer_id, dstNet, inpId, id, 0);
}
else if (type == "Conv2DBackpropInput")
{
@ -1293,13 +1341,42 @@ void TFImporter::populateNet(Net dstNet)
kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
const int* kshape = layerParams.blobs[0].size.p;
layerParams.set("kernel_h", kshape[2]);
layerParams.set("kernel_w", kshape[3]);
const int kernelH = kshape[2];
const int kernelW = kshape[3];
layerParams.set("kernel_h", kernelH);
layerParams.set("kernel_w", kernelW);
layerParams.set("num_output", kshape[1]);
setStrides(layerParams, layer);
setPadding(layerParams, layer);
// For convolution layer, output shape computes as
// o = 1 + (i - k + 2*p) / s
// i - input size, o - output size, k - kernel size, p - pad, s - stride
// In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2
// considering that k is odd.
// SAME: o = 1 + (i - 1) / s
// VALID: o = 1 + i / s
// Deconvolution's layer output shape computes as
// SAME: o = 1 + (i - 1)*s
// VALID: o = (i - 1)*s
// If output_shape differs from formulas above then adjust padding is applied.
const int strideY = layerParams.get<int>("stride_h");
const int strideX = layerParams.get<int>("stride_w");
Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0));
const int outH = outShape.at<int>(2);
const int outW = outShape.at<int>(1);
if (layerParams.get<String>("pad_mode") == "SAME")
{
layerParams.set("adj_w", (outW - 1) % strideX);
layerParams.set("adj_h", (outH - 1) % strideY);
}
else if (layerParams.get<String>("pad_mode") == "VALID")
{
layerParams.set("adj_w", (outW - kernelW) % strideX);
layerParams.set("adj_h", (outH - kernelH) % strideY);
}
int id = dstNet.addLayer(name, "Deconvolution", layerParams);
layer_id[name] = id;

@ -150,6 +150,8 @@ TEST(Test_TensorFlow, batch_norm)
runTensorFlowNet("batch_norm");
runTensorFlowNet("fused_batch_norm");
runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true);
runTensorFlowNet("mvn_batch_norm");
runTensorFlowNet("mvn_batch_norm_1x1");
}
OCL_TEST(Test_TensorFlow, batch_norm)
@ -170,6 +172,10 @@ TEST(Test_TensorFlow, pooling)
TEST(Test_TensorFlow, deconvolution)
{
runTensorFlowNet("deconvolution");
runTensorFlowNet("deconvolution_same");
runTensorFlowNet("deconvolution_stride_2_same");
runTensorFlowNet("deconvolution_adj_pad_valid");
runTensorFlowNet("deconvolution_adj_pad_same");
}
OCL_TEST(Test_TensorFlow, deconvolution)

Loading…
Cancel
Save