diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp index 9923cf3ab8..70d439af40 100644 --- a/modules/dnn/src/caffe/caffe_importer.cpp +++ b/modules/dnn/src/caffe/caffe_importer.cpp @@ -293,14 +293,13 @@ public: addedBlobs.reserve(layersSize + 1); //setup input layer names + std::vector netInputs(net.input_size()); { - std::vector netInputs(net.input_size()); for (int inNum = 0; inNum < net.input_size(); inNum++) { addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum)); netInputs[inNum] = net.input(inNum); } - dstNet.setInputsNames(netInputs); } for (int li = 0; li < layersSize; li++) @@ -317,6 +316,13 @@ public: if (repetitions) name += String("_") + toString(repetitions); + if (type == "Input") + { + addedBlobs.push_back(BlobNote(name, 0, netInputs.size())); + netInputs.push_back(name); + continue; + } + int id = dstNet.addLayer(name, type, layerParams); for (int inNum = 0; inNum < layer.bottom_size(); inNum++) @@ -325,6 +331,7 @@ public: for (int outNum = 0; outNum < layer.top_size(); outNum++) addOutput(layer, id, outNum); } + dstNet.setInputsNames(netInputs); addedBlobs.clear(); } diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 64e1155d5a..1ae2f85b24 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -106,6 +106,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer); CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer); CV_DNN_REGISTER_LAYER_CLASS(Identity, BlankLayer); + CV_DNN_REGISTER_LAYER_CLASS(Silence, BlankLayer); CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer); CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 129b874ea0..d637f5497b 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -311,15 +311,15 @@ public: Size kernel, Size pad, Size stride, Size dilation, const ActivationLayer* activ, int ngroups, int nstripes ) { - CV_Assert( input.dims == 4 && output.dims == 4 && - input.size[0] == output.size[0] && - weights.rows == output.size[1] && - weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height && - input.type() == output.type() && - input.type() == weights.type() && - input.type() == CV_32F && - input.isContinuous() && - output.isContinuous() && + CV_Assert( input.dims == 4 && output.dims == 4, + input.size[0] == output.size[0], + weights.rows == output.size[1], + weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height, + input.type() == output.type(), + input.type() == weights.type(), + input.type() == CV_32F, + input.isContinuous(), + output.isContinuous(), biasvec.size() == (size_t)output.size[1]+2); ParallelConv p; @@ -1237,7 +1237,6 @@ static void initConvDeconvLayerFromCaffe(Ptr l, const Laye l->pad.width, l->stride.height, l->stride.width, l->dilation.height, l->dilation.width, l->padMode); - bool bias = params.get("bias_term", true); l->numOutput = params.get("num_output"); int ngroups = params.get("group", 1); @@ -1245,7 +1244,6 @@ static void initConvDeconvLayerFromCaffe(Ptr l, const Laye l->adjustPad.width = params.get("adj_w", 0); CV_Assert(l->numOutput % ngroups == 0); - CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1)); CV_Assert(l->adjustPad.width < l->stride.width && l->adjustPad.height < l->stride.height); } diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 2d04c8a63c..f3c4a0c6cc 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -33,6 +33,7 @@ public: std::vector &outputs, std::vector &internals) const { + CV_Assert(blobs.size() == 1 + hasBias); Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); return true; } @@ -48,8 +49,6 @@ public: CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_Assert(blobs.size() == 1 + hasBias); - for (size_t ii = 0; ii < outputs.size(); ii++) { Mat &inpBlob = *inputs[ii]; diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index 43ffe0dd06..cc6f675b77 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -211,4 +211,27 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy) normAssert(out, ref, "", l1, lInf); } +// https://github.com/richzhang/colorization +TEST(Reproducibility_Colorization, Accuracy) +{ + const float l1 = 1e-5; + const float lInf = 3e-3; + + Mat inp = blobFromNPY(_tf("colorization_inp.npy")); + Mat ref = blobFromNPY(_tf("colorization_out.npy")); + Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy")); + + const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false); + const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false); + Net net = readNetFromCaffe(proto, model); + + net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel); + net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606)); + + net.setInput(inp); + Mat out = net.forward(); + + normAssert(out, ref, "", l1, lInf); +} + } diff --git a/samples/dnn/colorization.py b/samples/dnn/colorization.py new file mode 100644 index 0000000000..3f5ff3b3aa --- /dev/null +++ b/samples/dnn/colorization.py @@ -0,0 +1,67 @@ +# Script is based on https://github.com/richzhang/colorization/colorize.py +import numpy as np +import argparse +import cv2 as cv + +def parse_args(): + parser = argparse.ArgumentParser(description='iColor: deep interactive colorization') + parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera') + parser.add_argument('--prototxt', help='Path to colorization_deploy_v2.prototxt', default='./models/colorization_release_v2.prototxt') + parser.add_argument('--caffemodel', help='Path to colorization_release_v2.caffemodel', default='./models/colorization_release_v2.caffemodel') + parser.add_argument('--kernel', help='Path to pts_in_hull.npy', default='./resources/pts_in_hull.npy') + + args = parser.parse_args() + return args + +if __name__ == '__main__': + W_in = 224 + H_in = 224 + imshowSize = (640, 480) + + args = parse_args() + + # Select desired model + net = cv.dnn.readNetFromCaffe(args.prototxt, args.caffemodel) + + pts_in_hull = np.load(args.kernel) # load cluster centers + + # populate cluster centers as 1x1 convolution kernel + pts_in_hull = pts_in_hull.transpose().reshape(2, 313, 1, 1) + net.getLayer(long(net.getLayerId('class8_ab'))).blobs = [pts_in_hull.astype(np.float32)] + net.getLayer(long(net.getLayerId('conv8_313_rh'))).blobs = [np.full([1, 313], 2.606, np.float32)] + + if args.input: + cap = cv.VideoCapture(args.input) + else: + cap = cv.VideoCapture(0) + + while cv.waitKey(1) < 0: + hasFrame, frame = cap.read() + if not hasFrame: + cv.waitKey() + break + + img_rgb = (frame[:,:,[2, 1, 0]] * 1.0 / 255).astype(np.float32) + + img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab) + img_l = img_lab[:,:,0] # pull out L channel + (H_orig,W_orig) = img_rgb.shape[:2] # original image size + + # resize image to network input size + img_rs = cv.resize(img_rgb, (W_in, H_in)) # resize image to network input size + img_lab_rs = cv.cvtColor(img_rs, cv.COLOR_RGB2Lab) + img_l_rs = img_lab_rs[:,:,0] + img_l_rs -= 50 # subtract 50 for mean-centering + + net.setInput(cv.dnn.blobFromImage(img_l_rs)) + ab_dec = net.forward('class8_ab')[0,:,:,:].transpose((1,2,0)) # this is our result + + (H_out,W_out) = ab_dec.shape[:2] + ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig)) + img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L + img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1) + + frame = cv.resize(frame, imshowSize) + cv.imshow('origin', frame) + cv.imshow('gray', cv.cvtColor(frame, cv.COLOR_RGB2GRAY)) + cv.imshow('colorized', cv.resize(img_bgr_out, imshowSize))