Support global pooling by axis

pull/16223/head
Liubov Batanina 5 years ago
parent ffa72fc979
commit 543e0302d3
  1. 1
      modules/dnn/include/opencv2/dnn/all_layers.hpp
  2. 11
      modules/dnn/src/layers/pooling_layer.cpp
  3. 54
      modules/dnn/src/tensorflow/tf_importer.cpp

@ -251,6 +251,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
CV_DEPRECATED_EXTERNAL Size kernel, stride, pad;
CV_DEPRECATED_EXTERNAL int pad_l, pad_t, pad_r, pad_b;
bool globalPooling;
int global_axis;
bool computeMaxIdx;
String padMode;
bool ceilMode;

@ -95,6 +95,8 @@ public:
else
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
global_axis = params.has("global_axis") ? params.get<int>("global_axis") : -1;
getPoolingKernelParams(params, kernel_size, globalPooling, pads_begin, pads_end, strides, padMode);
if (kernel_size.size() == 2) {
kernel = Size(kernel_size[1], kernel_size[0]);
@ -149,6 +151,9 @@ public:
if (globalPooling) {
kernel = Size(inp[1], inp[0]);
kernel_size = std::vector<size_t>(inp.begin(), inp.end());
} else if (global_axis != -1) {
kernel_size[global_axis] = inp[global_axis];
kernel = Size(kernel_size[1], kernel_size[0]);
}
getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end);
@ -1037,6 +1042,12 @@ virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> >& inp
outShape[0] = inputs[1][0]; // Number of proposals;
outShape[1] = psRoiOutChannels;
}
else if (global_axis != -1)
{
CV_Assert(global_axis >= 0 && global_axis < inpShape.size());
outShape[2 + global_axis] = 1;
}
int numOutputs = requiredOutputs ? requiredOutputs : (type == MAX ? 2 : 1);
CV_Assert(numOutputs == 1 || (numOutputs == 2 && type == MAX));

@ -1944,14 +1944,13 @@ void TFImporter::populateNet(Net dstNet)
layer_id[flattenName] = flattenId;
connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0);
LayerParams reshapeLp;
std::string reshapeName = name + "/reshape";
CV_Assert(layer_id.find(reshapeName) == layer_id.end());
reshapeLp.set("axis", 0);
reshapeLp.set("axis", indices.at<int>(0));
reshapeLp.set("num_axes", 1);
std::vector<int> newShape = {1, 1, -1};
reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size()));
int newShape[] = {1, 1, -1};
reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3));
int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp);
layer_id[reshapeName] = reshapeId;
@ -1961,23 +1960,38 @@ void TFImporter::populateNet(Net dstNet)
std::string avgName = name + "/avg";
CV_Assert(layer_id.find(avgName) == layer_id.end());
avgLp.set("pool", "ave");
avgLp.set("kernel_h", 3); // TODO: node.shape[0]
avgLp.set("kernel_w", 1);
// pooling kernel H x 1
avgLp.set("global_axis", 0);
avgLp.set("kernel_size", 1);
int avgId = dstNet.addLayer(avgName, "Pooling", avgLp);
layer_id[avgName] = avgId;
// one input only
connect(layer_id, dstNet, Pin(reshapeName), avgId, 0);
LayerParams reshapeLp2;
std::string reshapeName2 = name;
CV_Assert(layer_id.find(reshapeName2) == layer_id.end());
newShape = {2, 20, 314, 253}; // TODO: remove out shapes
reshapeLp2.set("dim", DictValue::arrayInt<int*>(&newShape[0], newShape.size()));
LayerParams sliceLp;
std::string sliceName = name + "/slice";
CV_Assert(layer_id.find(sliceName) == layer_id.end());
sliceLp.set("axis", indices.at<int>(0));
int begin[] = {0};
int size[] = {1};
sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1));
sliceLp.set("size", DictValue::arrayInt(&size[0], 1));
int sliceId = dstNet.addLayer(sliceName, "Slice", sliceLp);
layer_id[sliceName] = sliceId;
connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0);
LayerParams squeezeLp;
std::string squeezeName = name + "/squeeze";
CV_Assert(layer_id.find(squeezeName) == layer_id.end());
squeezeLp.set("axis", indices.at<int>(0));
squeezeLp.set("end_axis", indices.at<int>(0) + 1);
int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp);
layer_id[squeezeName] = squeezeId;
connect(layer_id, dstNet, Pin(sliceName), squeezeId, 0);
int reshapeId2 = dstNet.addLayer(reshapeName2, "Reshape", reshapeLp2);
layer_id[reshapeName2] = reshapeId2;
connect(layer_id, dstNet, Pin(avgName), reshapeId2, 0);
int id = dstNet.addLayer(name, "Reshape", layerParams);
layer_id[name] = id;
connect(layer_id, dstNet, Pin(avgName), id, 0);
connect(layer_id, dstNet, Pin(squeezeName), id, 1);
} else {
if (indices.total() != 2 || indices.at<int>(0) != 1 || indices.at<int>(1) != 2)
CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean operation.");
@ -2021,13 +2035,15 @@ void TFImporter::populateNet(Net dstNet)
std::string base_name = name + "/reshape_";
std::vector<std::string> reshape_names;
for (int i = 0; i < num; i++) {
std::string reshape_name = base_name + std::to_string(i);
std::ostringstream ss;
ss << i;
std::string reshape_name = base_name + ss.str();
reshape_names.push_back(reshape_name);
LayerParams reshapeLP;
reshapeLP.set("axis", dim);
reshapeLP.set("num_axes", 1);
std::vector<int> outShape = {1, -1};
reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
int outShape[] = {1, -1};
reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2));
int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP);
layer_id[reshape_name] = id;
connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0);

Loading…
Cancel
Save