Custom deep learning layers in Python

pull/11394/head
Dmitry Kurtaev 7 years ago
parent ca1975cada
commit d5b9563263
  1. 34
      doc/tutorials/dnn/dnn_custom_layers/dnn_custom_layers.md
  2. BIN
      doc/tutorials/dnn/images/lena_hed.jpg
  3. 180
      modules/dnn/misc/python/pyopencv_dnn.hpp
  4. 4
      modules/python/src2/cv2.cpp
  5. 69
      samples/dnn/edge_detection.py

@ -190,3 +190,37 @@ In our case resize's output shape will be stored in layer's `blobs[0]`.
Next we register a layer and try to import the model.
@snippet dnn/custom_layers.cpp Register ResizeBilinearLayer
## Define a custom layer in Python
The following example shows how to customize OpenCV's layers in Python.
Let's consider [Holistically-Nested Edge Detection](https://arxiv.org/abs/1504.06375)
deep learning model. That was trained with one and only difference comparing to
a current version of [Caffe framework](http://caffe.berkeleyvision.org/). `Crop`
layers that receive two input blobs and crop the first one to match spatial dimensions
of the second one used to crop from the center. Nowadays Caffe's layer does it
from the top-left corner. So using the latest version of Caffe or OpenCV you'll
get shifted results with filled borders.
Next we're going to replace OpenCV's `Crop` layer that makes top-left cropping by
a centric one.
- Create a class with `getMemoryShapes` and `forward` methods
@snippet dnn/edge_detection.py CropLayer
@note Both methods should return lists.
- Register a new layer.
@snippet dnn/edge_detection.py Register
That's it! We've replaced an implemented OpenCV's layer to a custom one.
You may find a full script in the [source code](https://github.com/opencv/opencv/tree/master/samples/dnn/edge_detection.py).
<table border="0">
<tr>
<td>![](js_tutorials/js_assets/lena.jpg)</td>
<td>![](images/lena_hed.jpg)</td>
</tr>
</table>

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

@ -40,4 +40,182 @@ bool pyopencv_to(PyObject *o, std::vector<Mat> &blobs, const char *name) //requi
return pyopencvVecConverter<Mat>::to(o, blobs, ArgInfo(name, false));
}
#endif
template<typename T>
PyObject* pyopencv_from(const dnn::DictValue &dv)
{
if (dv.size() > 1)
{
std::vector<T> vec(dv.size());
for (int i = 0; i < dv.size(); ++i)
vec[i] = dv.get<T>(i);
return pyopencv_from_generic_vec(vec);
}
else
return pyopencv_from(dv.get<T>());
}
template<>
PyObject* pyopencv_from(const dnn::DictValue &dv)
{
if (dv.isInt()) return pyopencv_from<int>(dv);
if (dv.isReal()) return pyopencv_from<float>(dv);
if (dv.isString()) return pyopencv_from<String>(dv);
CV_Error(Error::StsNotImplemented, "Unknown value type");
return NULL;
}
template<>
PyObject* pyopencv_from(const dnn::LayerParams& lp)
{
PyObject* dict = PyDict_New();
for (std::map<String, dnn::DictValue>::const_iterator it = lp.begin(); it != lp.end(); ++it)
{
CV_Assert(!PyDict_SetItemString(dict, it->first.c_str(), pyopencv_from(it->second)));
}
return dict;
}
class pycvLayer CV_FINAL : public dnn::Layer
{
public:
pycvLayer(const dnn::LayerParams &params, PyObject* pyLayer) : Layer(params)
{
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
PyObject* args = PyTuple_New(2);
CV_Assert(!PyTuple_SetItem(args, 0, pyopencv_from(params)));
CV_Assert(!PyTuple_SetItem(args, 1, pyopencv_from(params.blobs)));
o = PyObject_CallObject(pyLayer, args);
Py_DECREF(args);
PyGILState_Release(gstate);
if (!o)
CV_Error(Error::StsError, "Failed to create an instance of custom layer");
}
static void registerLayer(const std::string& type, PyObject* o)
{
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
if (it != pyLayers.end())
it->second.push_back(o);
else
pyLayers[type] = std::vector<PyObject*>(1, o);
}
static void unregisterLayer(const std::string& type)
{
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(type);
if (it != pyLayers.end())
{
if (it->second.size() > 1)
it->second.pop_back();
else
pyLayers.erase(it);
}
}
static Ptr<dnn::Layer> create(dnn::LayerParams &params)
{
std::map<std::string, std::vector<PyObject*> >::iterator it = pyLayers.find(params.type);
if (it == pyLayers.end())
CV_Error(Error::StsNotImplemented, "Layer with a type \"" + params.type +
"\" is not implemented");
CV_Assert(!it->second.empty());
return Ptr<dnn::Layer>(new pycvLayer(params, it->second.back()));
}
virtual bool getMemoryShapes(const std::vector<std::vector<int> > &inputs,
const int,
std::vector<std::vector<int> > &outputs,
std::vector<std::vector<int> > &) const CV_OVERRIDE
{
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
PyObject* args = PyList_New(inputs.size());
for(size_t i = 0; i < inputs.size(); ++i)
PyList_SET_ITEM(args, i, pyopencv_from_generic_vec(inputs[i]));
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("getMemoryShapes"), args, NULL);
Py_DECREF(args);
PyGILState_Release(gstate);
if (!res)
CV_Error(Error::StsNotImplemented, "Failed to call \"getMemoryShapes\" method");
pyopencv_to_generic_vec(res, outputs, ArgInfo("", 0));
return false;
}
virtual void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &) CV_OVERRIDE
{
PyGILState_STATE gstate;
gstate = PyGILState_Ensure();
std::vector<Mat> inps(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i)
inps[i] = *inputs[i];
PyObject* args = pyopencv_from(inps);
PyObject* res = PyObject_CallMethodObjArgs(o, PyString_FromString("forward"), args, NULL);
Py_DECREF(args);
PyGILState_Release(gstate);
if (!res)
CV_Error(Error::StsNotImplemented, "Failed to call \"forward\" method");
std::vector<Mat> pyOutputs;
pyopencv_to(res, pyOutputs, ArgInfo("", 0));
CV_Assert(pyOutputs.size() == outputs.size());
for (size_t i = 0; i < outputs.size(); ++i)
{
CV_Assert(pyOutputs[i].size == outputs[i].size);
CV_Assert(pyOutputs[i].type() == outputs[i].type());
pyOutputs[i].copyTo(outputs[i]);
}
}
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE
{
CV_Error(Error::StsNotImplemented, "");
}
private:
// Map layers types to python classes.
static std::map<std::string, std::vector<PyObject*> > pyLayers;
PyObject* o; // Instance of implemented python layer.
};
std::map<std::string, std::vector<PyObject*> > pycvLayer::pyLayers;
static PyObject *pyopencv_cv_dnn_registerLayer(PyObject*, PyObject *args, PyObject *kw)
{
const char *keywords[] = { "type", "class", NULL };
char* layerType;
PyObject *classInstance;
if (!PyArg_ParseTupleAndKeywords(args, kw, "sO", (char**)keywords, &layerType, &classInstance))
return NULL;
if (!PyCallable_Check(classInstance)) {
PyErr_SetString(PyExc_TypeError, "class must be callable");
return NULL;
}
pycvLayer::registerLayer(layerType, classInstance);
dnn::LayerFactory::registerLayer(layerType, pycvLayer::create);
Py_RETURN_NONE;
}
static PyObject *pyopencv_cv_dnn_unregisterLayer(PyObject*, PyObject *args, PyObject *kw)
{
const char *keywords[] = { "type", NULL };
char* layerType;
if (!PyArg_ParseTupleAndKeywords(args, kw, "s", (char**)keywords, &layerType))
return NULL;
pycvLayer::unregisterLayer(layerType);
dnn::LayerFactory::unregisterLayer(layerType);
Py_RETURN_NONE;
}
#endif // HAVE_OPENCV_DNN

@ -1783,6 +1783,10 @@ static PyMethodDef special_methods[] = {
{"createTrackbar", pycvCreateTrackbar, METH_VARARGS, "createTrackbar(trackbarName, windowName, value, count, onChange) -> None"},
{"createButton", (PyCFunction)pycvCreateButton, METH_VARARGS | METH_KEYWORDS, "createButton(buttonName, onChange [, userData, buttonType, initialButtonState]) -> None"},
{"setMouseCallback", (PyCFunction)pycvSetMouseCallback, METH_VARARGS | METH_KEYWORDS, "setMouseCallback(windowName, onMouse [, param]) -> None"},
#endif
#ifdef HAVE_OPENCV_DNN
{"dnn_registerLayer", (PyCFunction)pyopencv_cv_dnn_registerLayer, METH_VARARGS | METH_KEYWORDS, "registerLayer(type, class) -> None"},
{"dnn_unregisterLayer", (PyCFunction)pyopencv_cv_dnn_unregisterLayer, METH_VARARGS | METH_KEYWORDS, "unregisterLayer(type) -> None"},
#endif
{NULL, NULL},
};

@ -0,0 +1,69 @@
import cv2 as cv
import argparse
parser = argparse.ArgumentParser(
description='This sample shows how to define custom OpenCV deep learning layers in Python. '
'Holistically-Nested Edge Detection (https://arxiv.org/abs/1504.06375) neural network '
'is used as an example model. Find a pre-trained model at https://github.com/s9xie/hed.')
parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
parser.add_argument('--prototxt', help='Path to deploy.prototxt', required=True)
parser.add_argument('--caffemodel', help='Path to hed_pretrained_bsds.caffemodel', required=True)
parser.add_argument('--width', help='Resize input image to a specific width', default=500, type=int)
parser.add_argument('--height', help='Resize input image to a specific height', default=500, type=int)
args = parser.parse_args()
#! [CropLayer]
class CropLayer(object):
def __init__(self, params, blobs):
self.xstart = 0
self.xend = 0
self.ystart = 0
self.yend = 0
# Our layer receives two inputs. We need to crop the first input blob
# to match a shape of the second one (keeping batch size and number of channels)
def getMemoryShapes(self, inputs):
inputShape, targetShape = inputs[0], inputs[1]
batchSize, numChannels = inputShape[0], inputShape[1]
height, width = targetShape[2], targetShape[3]
self.ystart = (inputShape[2] - targetShape[2]) / 2
self.xstart = (inputShape[3] - targetShape[3]) / 2
self.yend = self.ystart + height
self.xend = self.xstart + width
return [[batchSize, numChannels, height, width]]
def forward(self, inputs):
return [inputs[0][:,:,self.ystart:self.yend,self.xstart:self.xend]]
#! [CropLayer]
#! [Register]
cv.dnn_registerLayer('Crop', CropLayer)
#! [Register]
# Load the model.
net = cv.dnn.readNet(args.prototxt, args.caffemodel)
kWinName = 'Holistically-Nested Edge Detection'
cv.namedWindow('Input', cv.WINDOW_NORMAL)
cv.namedWindow(kWinName, cv.WINDOW_NORMAL)
cap = cv.VideoCapture(args.input if args.input else 0)
while cv.waitKey(1) < 0:
hasFrame, frame = cap.read()
if not hasFrame:
cv.waitKey()
break
cv.imshow('Input', frame)
inp = cv.dnn.blobFromImage(frame, scalefactor=1.0, size=(args.width, args.height),
mean=(104.00698793, 116.66876762, 122.67891434),
swapRB=False, crop=False)
net.setInput(inp)
out = net.forward()
out = out[0, 0]
out = cv.resize(out, (frame.shape[1], frame.shape[0]))
cv.imshow(kWinName, out)
Loading…
Cancel
Save