From eab556e1e012ff9e7140433ccb338ee932a15e4b Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Tue, 20 Feb 2018 18:04:03 +0300 Subject: [PATCH] OpenCV face detection network in TensorFlow --- modules/dnn/misc/face_detector_accuracy.py | 195 ++++++++++++ modules/dnn/misc/quantize_face_detector.py | 348 +++++++++++++++++++++ modules/dnn/src/tensorflow/tf_importer.cpp | 27 +- modules/dnn/test/test_tf_importer.cpp | 24 ++ 4 files changed, 592 insertions(+), 2 deletions(-) create mode 100644 modules/dnn/misc/face_detector_accuracy.py create mode 100644 modules/dnn/misc/quantize_face_detector.py diff --git a/modules/dnn/misc/face_detector_accuracy.py b/modules/dnn/misc/face_detector_accuracy.py new file mode 100644 index 0000000000..0e9125e96d --- /dev/null +++ b/modules/dnn/misc/face_detector_accuracy.py @@ -0,0 +1,195 @@ +# This script is used to estimate an accuracy of different face detection models. +# COCO evaluation tool is used to compute an accuracy metrics (Average Precision). +# Script works with different face detection datasets. +import os +import json +from fnmatch import fnmatch +from math import pi +import cv2 as cv +import argparse +import os +import sys +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +parser = argparse.ArgumentParser( + description='Evaluate OpenCV face detection algorithms ' + 'using COCO evaluation tool, http://cocodataset.org/#detections-eval') +parser.add_argument('--proto', help='Path to .prototxt of Caffe model or .pbtxt of TensorFlow graph') +parser.add_argument('--model', help='Path to .caffemodel trained in Caffe or .pb from TensorFlow') +parser.add_argument('--caffe', help='Indicate that tested model is from Caffe. Otherwise model from TensorFlow is expected.', action='store_true') +parser.add_argument('--cascade', help='Optional path to trained Haar cascade as ' + 'an additional model for evaluation') +parser.add_argument('--ann', help='Path to text file with ground truth annotations') +parser.add_argument('--pics', help='Path to images root directory') +parser.add_argument('--fddb', help='Evaluate FDDB dataset, http://vis-www.cs.umass.edu/fddb/', action='store_true') +parser.add_argument('--wider', help='Evaluate WIDER FACE dataset, http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/', action='store_true') +args = parser.parse_args() + +dataset = {} +dataset['images'] = [] +dataset['categories'] = [{ 'id': 0, 'name': 'face' }] +dataset['annotations'] = [] + +def ellipse2Rect(params): + rad_x = params[0] + rad_y = params[1] + angle = params[2] * 180.0 / pi + center_x = params[3] + center_y = params[4] + pts = cv.ellipse2Poly((int(center_x), int(center_y)), (int(rad_x), int(rad_y)), + int(angle), 0, 360, 10) + rect = cv.boundingRect(pts) + left = rect[0] + top = rect[1] + right = rect[0] + rect[2] + bottom = rect[1] + rect[3] + return left, top, right, bottom + +def addImage(imagePath): + assert('images' in dataset) + imageId = len(dataset['images']) + dataset['images'].append({ + 'id': int(imageId), + 'file_name': imagePath + }) + return imageId + +def addBBox(imageId, left, top, width, height): + assert('annotations' in dataset) + dataset['annotations'].append({ + 'id': len(dataset['annotations']), + 'image_id': int(imageId), + 'category_id': 0, # Face + 'bbox': [int(left), int(top), int(width), int(height)], + 'iscrowd': 0, + 'area': float(width * height) + }) + +def addDetection(detections, imageId, left, top, width, height, score): + detections.append({ + 'image_id': int(imageId), + 'category_id': 0, # Face + 'bbox': [int(left), int(top), int(width), int(height)], + 'score': float(score) + }) + + +def fddb_dataset(annotations, images): + for d in os.listdir(annotations): + if fnmatch(d, 'FDDB-fold-*-ellipseList.txt'): + with open(os.path.join(annotations, d), 'rt') as f: + lines = [line.rstrip('\n') for line in f] + lineId = 0 + while lineId < len(lines): + # Image + imgPath = lines[lineId] + lineId += 1 + imageId = addImage(os.path.join(images, imgPath) + '.jpg') + + img = cv.imread(os.path.join(images, imgPath) + '.jpg') + + # Faces + numFaces = int(lines[lineId]) + lineId += 1 + for i in range(numFaces): + params = [float(v) for v in lines[lineId].split()] + lineId += 1 + left, top, right, bottom = ellipse2Rect(params) + addBBox(imageId, left, top, width=right - left + 1, + height=bottom - top + 1) + + +def wider_dataset(annotations, images): + with open(annotations, 'rt') as f: + lines = [line.rstrip('\n') for line in f] + lineId = 0 + while lineId < len(lines): + # Image + imgPath = lines[lineId] + lineId += 1 + imageId = addImage(os.path.join(images, imgPath)) + + # Faces + numFaces = int(lines[lineId]) + lineId += 1 + for i in range(numFaces): + params = [int(v) for v in lines[lineId].split()] + lineId += 1 + left, top, width, height = params[0], params[1], params[2], params[3] + addBBox(imageId, left, top, width, height) + +def evaluate(): + cocoGt = COCO('annotations.json') + cocoDt = cocoGt.loadRes('detections.json') + cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + +### Convert to COCO annotations format ######################################### +assert(args.fddb or args.wider) +if args.fddb: + fddb_dataset(args.ann, args.pics) +elif args.wider: + wider_dataset(args.ann, args.pics) + +with open('annotations.json', 'wt') as f: + json.dump(dataset, f) + +### Obtain detections ########################################################## +detections = [] +if args.proto and args.model: + if args.caffe: + net = cv.dnn.readNetFromCaffe(args.proto, args.model) + else: + net = cv.dnn.readNetFromTensorflow(args.model, args.proto) + + def detect(img, imageId): + imgWidth = img.shape[1] + imgHeight = img.shape[0] + net.setInput(cv.dnn.blobFromImage(img, 1.0, (300, 300), (104., 177., 123.), False, False)) + out = net.forward() + + for i in range(out.shape[2]): + confidence = out[0, 0, i, 2] + left = int(out[0, 0, i, 3] * img.shape[1]) + top = int(out[0, 0, i, 4] * img.shape[0]) + right = int(out[0, 0, i, 5] * img.shape[1]) + bottom = int(out[0, 0, i, 6] * img.shape[0]) + addDetection(detections, imageId, left, top, width=right - left + 1, + height=bottom - top + 1, score=confidence) + +elif args.cascade: + cascade = cv.CascadeClassifier(args.cascade) + + def detect(img, imageId): + srcImgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) + faces = cascade.detectMultiScale(srcImgGray) + + for rect in faces: + left, top, width, height = rect[0], rect[1], rect[2], rect[3] + addDetection(detections, imageId, left, top, width, height, score=1.0) + +for i in range(len(dataset['images'])): + sys.stdout.write('\r%d / %d' % (i + 1, len(dataset['images']))) + sys.stdout.flush() + + img = cv.imread(dataset['images'][i]['file_name']) + imageId = int(dataset['images'][i]['id']) + + detect(img, imageId) + +with open('detections.json', 'wt') as f: + json.dump(detections, f) + +evaluate() + + +def rm(f): + if os.path.exists(f): + os.remove(f) + +rm('annotations.json') +rm('detections.json') diff --git a/modules/dnn/misc/quantize_face_detector.py b/modules/dnn/misc/quantize_face_detector.py new file mode 100644 index 0000000000..06acae976b --- /dev/null +++ b/modules/dnn/misc/quantize_face_detector.py @@ -0,0 +1,348 @@ +import argparse +import cv2 as cv +import tensorflow as tf +import numpy as np +import struct + +from tensorflow.python.tools import optimize_for_inference_lib +from tensorflow.tools.graph_transforms import TransformGraph +from tensorflow.core.framework.node_def_pb2 import NodeDef +from google.protobuf import text_format + +parser = argparse.ArgumentParser(description="Use this script to create TensorFlow graph " + "with weights from OpenCV's face detection network. " + "Only backbone part of SSD model is converted this way. " + "Look for .pbtxt configuration file at " + "https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/opencv_face_detector.pbtxt") +parser.add_argument('--model', help='Path to .caffemodel weights', required=True) +parser.add_argument('--proto', help='Path to .prototxt Caffe model definition', required=True) +parser.add_argument('--pb', help='Path to output .pb TensorFlow model', required=True) +parser.add_argument('--pbtxt', help='Path to output .pbxt TensorFlow graph', required=True) +parser.add_argument('--quantize', help='Quantize weights to uint8', action='store_true') +parser.add_argument('--fp16', help='Convert weights to half precision floats', action='store_true') +args = parser.parse_args() + +assert(not args.quantize or not args.fp16) + +dtype = tf.float16 if args.fp16 else tf.float32 + +################################################################################ +cvNet = cv.dnn.readNetFromCaffe(args.proto, args.model) + +def dnnLayer(name): + return cvNet.getLayer(long(cvNet.getLayerId(name))) + +def scale(x, name): + with tf.variable_scope(name): + layer = dnnLayer(name) + w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul') + if len(layer.blobs) > 1: + b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='add') + return tf.nn.bias_add(tf.multiply(x, w), b) + else: + return tf.multiply(x, w, name) + +def conv(x, name, stride=1, pad='SAME', dilation=1, activ=None): + with tf.variable_scope(name): + layer = dnnLayer(name) + w = tf.Variable(layer.blobs[0].transpose(2, 3, 1, 0), dtype=dtype, name='weights') + if dilation == 1: + conv = tf.nn.conv2d(x, filter=w, strides=(1, stride, stride, 1), padding=pad) + else: + assert(stride == 1) + conv = tf.nn.atrous_conv2d(x, w, rate=dilation, padding=pad) + + if len(layer.blobs) > 1: + b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='bias') + conv = tf.nn.bias_add(conv, b) + return activ(conv) if activ else conv + +def batch_norm(x, name): + with tf.variable_scope(name): + # Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input. + # Here we do a cast to fp32 but remove it in the frozen graph. + if x.dtype != tf.float32: + x = tf.cast(x, tf.float32) + + layer = dnnLayer(name) + assert(len(layer.blobs) >= 3) + + mean = layer.blobs[0].flatten() + std = layer.blobs[1].flatten() + scale = layer.blobs[2].flatten() + + eps = 1e-5 + hasBias = len(layer.blobs) > 3 + hasWeights = scale.shape != (1,) + + if not hasWeights and not hasBias: + mean /= scale[0] + std /= scale[0] + + mean = tf.Variable(mean, dtype=tf.float32, name='mean') + std = tf.Variable(std, dtype=tf.float32, name='std') + gamma = tf.Variable(scale if hasWeights else np.ones(mean.shape), dtype=tf.float32, name='gamma') + beta = tf.Variable(layer.blobs[3].flatten() if hasBias else np.zeros(mean.shape), dtype=tf.float32, name='beta') + bn = tf.nn.fused_batch_norm(x, gamma, beta, mean, std, eps, + is_training=False)[0] + if bn.dtype != dtype: + bn = tf.cast(bn, dtype) + return bn + +def l2norm(x, name): + with tf.variable_scope(name): + layer = dnnLayer(name) + w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul') + return tf.nn.l2_normalize(x, 3, epsilon=1e-10) * w + +### Graph definition ########################################################### +inp = tf.placeholder(dtype, [1, 300, 300, 3], 'data') +data_bn = batch_norm(inp, 'data_bn') +data_scale = scale(data_bn, 'data_scale') +data_scale = tf.pad(data_scale, [[0, 0], [3, 3], [3, 3], [0, 0]]) +conv1_h = conv(data_scale, stride=2, pad='VALID', name='conv1_h') +conv1_bn_h = batch_norm(conv1_h, 'conv1_bn_h') +conv1_scale_h = scale(conv1_bn_h, 'conv1_scale_h') +conv1_relu = tf.nn.relu(conv1_scale_h) +conv1_pool = tf.layers.max_pooling2d(conv1_relu, pool_size=(3, 3), strides=(2, 2), + padding='SAME', name='conv1_pool') + +layer_64_1_conv1_h = conv(conv1_pool, 'layer_64_1_conv1_h') +layer_64_1_bn2_h = batch_norm(layer_64_1_conv1_h, 'layer_64_1_bn2_h') +layer_64_1_scale2_h = scale(layer_64_1_bn2_h, 'layer_64_1_scale2_h') +layer_64_1_relu2 = tf.nn.relu(layer_64_1_scale2_h) +layer_64_1_conv2_h = conv(layer_64_1_relu2, 'layer_64_1_conv2_h') +layer_64_1_sum = layer_64_1_conv2_h + conv1_pool + +layer_128_1_bn1_h = batch_norm(layer_64_1_sum, 'layer_128_1_bn1_h') +layer_128_1_scale1_h = scale(layer_128_1_bn1_h, 'layer_128_1_scale1_h') +layer_128_1_relu1 = tf.nn.relu(layer_128_1_scale1_h) +layer_128_1_conv1_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv1_h') +layer_128_1_bn2 = batch_norm(layer_128_1_conv1_h, 'layer_128_1_bn2') +layer_128_1_scale2 = scale(layer_128_1_bn2, 'layer_128_1_scale2') +layer_128_1_relu2 = tf.nn.relu(layer_128_1_scale2) +layer_128_1_conv2 = conv(layer_128_1_relu2, 'layer_128_1_conv2') +layer_128_1_conv_expand_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv_expand_h') +layer_128_1_sum = layer_128_1_conv2 + layer_128_1_conv_expand_h + +layer_256_1_bn1 = batch_norm(layer_128_1_sum, 'layer_256_1_bn1') +layer_256_1_scale1 = scale(layer_256_1_bn1, 'layer_256_1_scale1') +layer_256_1_relu1 = tf.nn.relu(layer_256_1_scale1) +layer_256_1_conv1 = tf.pad(layer_256_1_relu1, [[0, 0], [1, 1], [1, 1], [0, 0]]) +layer_256_1_conv1 = conv(layer_256_1_conv1, stride=2, pad='VALID', name='layer_256_1_conv1') +layer_256_1_bn2 = batch_norm(layer_256_1_conv1, 'layer_256_1_bn2') +layer_256_1_scale2 = scale(layer_256_1_bn2, 'layer_256_1_scale2') +layer_256_1_relu2 = tf.nn.relu(layer_256_1_scale2) +layer_256_1_conv2 = conv(layer_256_1_relu2, 'layer_256_1_conv2') +layer_256_1_conv_expand = conv(layer_256_1_relu1, stride=2, name='layer_256_1_conv_expand') +layer_256_1_sum = layer_256_1_conv2 + layer_256_1_conv_expand + +layer_512_1_bn1 = batch_norm(layer_256_1_sum, 'layer_512_1_bn1') +layer_512_1_scale1 = scale(layer_512_1_bn1, 'layer_512_1_scale1') +layer_512_1_relu1 = tf.nn.relu(layer_512_1_scale1) +layer_512_1_conv1_h = conv(layer_512_1_relu1, 'layer_512_1_conv1_h') +layer_512_1_bn2_h = batch_norm(layer_512_1_conv1_h, 'layer_512_1_bn2_h') +layer_512_1_scale2_h = scale(layer_512_1_bn2_h, 'layer_512_1_scale2_h') +layer_512_1_relu2 = tf.nn.relu(layer_512_1_scale2_h) +layer_512_1_conv2_h = conv(layer_512_1_relu2, dilation=2, name='layer_512_1_conv2_h') +layer_512_1_conv_expand_h = conv(layer_512_1_relu1, 'layer_512_1_conv_expand_h') +layer_512_1_sum = layer_512_1_conv2_h + layer_512_1_conv_expand_h + +last_bn_h = batch_norm(layer_512_1_sum, 'last_bn_h') +last_scale_h = scale(last_bn_h, 'last_scale_h') +fc7 = tf.nn.relu(last_scale_h, name='last_relu') + +conv6_1_h = conv(fc7, 'conv6_1_h', activ=tf.nn.relu) +conv6_2_h = conv(conv6_1_h, stride=2, name='conv6_2_h', activ=tf.nn.relu) +conv7_1_h = conv(conv6_2_h, 'conv7_1_h', activ=tf.nn.relu) +conv7_2_h = tf.pad(conv7_1_h, [[0, 0], [1, 1], [1, 1], [0, 0]]) +conv7_2_h = conv(conv7_2_h, stride=2, pad='VALID', name='conv7_2_h', activ=tf.nn.relu) +conv8_1_h = conv(conv7_2_h, pad='SAME', name='conv8_1_h', activ=tf.nn.relu) +conv8_2_h = conv(conv8_1_h, pad='SAME', name='conv8_2_h', activ=tf.nn.relu) +conv9_1_h = conv(conv8_2_h, 'conv9_1_h', activ=tf.nn.relu) +conv9_2_h = conv(conv9_1_h, pad='SAME', name='conv9_2_h', activ=tf.nn.relu) + +conv4_3_norm = l2norm(layer_256_1_relu1, 'conv4_3_norm') + +### Locations and confidences ################################################## +locations = [] +confidences = [] +flattenLayersNames = [] # Collect all reshape layers names that should be replaced to flattens. +for top, suffix in zip([locations, confidences], ['_mbox_loc', '_mbox_conf']): + for bottom, name in zip([conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h], + ['conv4_3_norm', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']): + name += suffix + flat = tf.layers.flatten(conv(bottom, name)) + flattenLayersNames.append(flat.name[:flat.name.find(':')]) + top.append(flat) + +mbox_loc = tf.concat(locations, axis=-1, name='mbox_loc') +mbox_conf = tf.concat(confidences, axis=-1, name='mbox_conf') + +total = int(np.prod(mbox_conf.shape[1:])) +mbox_conf_reshape = tf.reshape(mbox_conf, [-1, 2], name='mbox_conf_reshape') +mbox_conf_softmax = tf.nn.softmax(mbox_conf_reshape, name='mbox_conf_softmax') +mbox_conf_flatten = tf.reshape(mbox_conf_softmax, [-1, total], name='mbox_conf_flatten') +flattenLayersNames.append('mbox_conf_flatten') + +with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + + ### Check correctness ###################################################### + out_nodes = ['mbox_loc', 'mbox_conf_flatten'] + inp_nodes = [inp.name[:inp.name.find(':')]] + + np.random.seed(2701) + inputData = np.random.standard_normal([1, 3, 300, 300]).astype(np.float32) + + cvNet.setInput(inputData) + outDNN = cvNet.forward(out_nodes) + + outTF = sess.run([mbox_loc, mbox_conf_flatten], feed_dict={inp: inputData.transpose(0, 2, 3, 1)}) + print 'Max diff @ locations: %e' % np.max(np.abs(outDNN[0] - outTF[0])) + print 'Max diff @ confidence: %e' % np.max(np.abs(outDNN[1] - outTF[1])) + + # Save a graph + graph_def = sess.graph.as_graph_def() + + # Freeze graph. Replaces variables to constants. + graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, out_nodes) + # Optimize graph. Removes training-only ops, unused nodes. + graph_def = optimize_for_inference_lib.optimize_for_inference(graph_def, inp_nodes, out_nodes, dtype.as_datatype_enum) + # Fuse constant operations. + transforms = ["fold_constants(ignore_errors=True)"] + if args.quantize: + transforms += ["quantize_weights(minimum_size=0)"] + transforms += ["sort_by_execution_order"] + graph_def = TransformGraph(graph_def, inp_nodes, out_nodes, transforms) + + # By default, float16 weights are stored in repeated tensor's field called + # `half_val`. It has type int32 with leading zeros for unused bytes. + # This type is encoded by Varint that means only 7 bits are used for value + # representation but the last one is indicated the end of encoding. This way + # float16 might takes 1 or 2 or 3 bytes depends on value. To impove compression, + # we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone. + for node in graph_def.node: + if 'value' in node.attr: + halfs = node.attr["value"].tensor.half_val + if not node.attr["value"].tensor.tensor_content and halfs: + node.attr["value"].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs) + node.attr["value"].tensor.ClearField('half_val') + + # Serialize + with tf.gfile.FastGFile(args.pb, 'wb') as f: + f.write(graph_def.SerializeToString()) + + +################################################################################ +# Write a text graph representation +################################################################################ +def tensorMsg(values): + msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values) + for value in values: + msg += 'float_val: %f ' % value + return msg + '}' + +# Remove Const nodes and unused attributes. +for i in reversed(range(len(graph_def.node))): + if graph_def.node[i].op in ['Const', 'Dequantize']: + del graph_def.node[i] + for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim', + 'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training', + 'Tpaddings']: + if attr in graph_def.node[i].attr: + del graph_def.node[i].attr[attr] + +# Append prior box generators +min_sizes = [30, 60, 111, 162, 213, 264] +max_sizes = [60, 111, 162, 213, 264, 315] +steps = [8, 16, 32, 64, 100, 300] +aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] +layers = [conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h] +for i in range(6): + priorBox = NodeDef() + priorBox.name = 'PriorBox_%d' % i + priorBox.op = 'PriorBox' + priorBox.input.append(layers[i].name[:layers[i].name.find(':')]) + priorBox.input.append(inp_nodes[0]) # data + + text_format.Merge('i: %d' % min_sizes[i], priorBox.attr["min_size"]) + text_format.Merge('i: %d' % max_sizes[i], priorBox.attr["max_size"]) + text_format.Merge('b: true', priorBox.attr["flip"]) + text_format.Merge('b: false', priorBox.attr["clip"]) + text_format.Merge(tensorMsg(aspect_ratios[i]), priorBox.attr["aspect_ratio"]) + text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"]) + text_format.Merge('f: %f' % steps[i], priorBox.attr["step"]) + text_format.Merge('f: 0.5', priorBox.attr["offset"]) + graph_def.node.extend([priorBox]) + +# Concatenate prior boxes +concat = NodeDef() +concat.name = 'mbox_priorbox' +concat.op = 'ConcatV2' +for i in range(6): + concat.input.append('PriorBox_%d' % i) +concat.input.append('mbox_loc/axis') +graph_def.node.extend([concat]) + +# DetectionOutput layer +detectionOut = NodeDef() +detectionOut.name = 'detection_out' +detectionOut.op = 'DetectionOutput' + +detectionOut.input.append('mbox_loc') +detectionOut.input.append('mbox_conf_flatten') +detectionOut.input.append('mbox_priorbox') + +text_format.Merge('i: 2', detectionOut.attr['num_classes']) +text_format.Merge('b: true', detectionOut.attr['share_location']) +text_format.Merge('i: 0', detectionOut.attr['background_label_id']) +text_format.Merge('f: 0.45', detectionOut.attr['nms_threshold']) +text_format.Merge('i: 400', detectionOut.attr['top_k']) +text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type']) +text_format.Merge('i: 200', detectionOut.attr['keep_top_k']) +text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold']) + +graph_def.node.extend([detectionOut]) + +# Replace L2Normalization subgraph onto a single node. +for i in reversed(range(len(graph_def.node))): + if graph_def.node[i].name in ['conv4_3_norm/l2_normalize/Square', + 'conv4_3_norm/l2_normalize/Sum', + 'conv4_3_norm/l2_normalize/Maximum', + 'conv4_3_norm/l2_normalize/Rsqrt']: + del graph_def.node[i] +for node in graph_def.node: + if node.name == 'conv4_3_norm/l2_normalize': + node.op = 'L2Normalize' + node.input.pop() + node.input.pop() + node.input.append(layer_256_1_relu1.name) + break + +softmaxShape = NodeDef() +softmaxShape.name = 'reshape_before_softmax' +softmaxShape.op = 'Const' +text_format.Merge( +'tensor {' +' dtype: DT_INT32' +' tensor_shape { dim { size: 3 } }' +' int_val: 0' +' int_val: -1' +' int_val: 2' +'}', softmaxShape.attr["value"]) +graph_def.node.extend([softmaxShape]) + +for node in graph_def.node: + if node.name == 'mbox_conf_reshape': + node.input[1] = softmaxShape.name + elif node.name == 'mbox_conf_softmax': + text_format.Merge('i: 2', node.attr['axis']) + elif node.name in flattenLayersNames: + node.op = 'Flatten' + inpName = node.input[0] + node.input.pop() + node.input.pop() + node.input.append(inpName) + +tf.train.write_graph(graph_def, "", args.pbtxt, as_text=True) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 77c62e42c5..5309ec40ce 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -651,7 +651,8 @@ static void addConstNodes(tensorflow::GraphDef& net, std::map& cons tensor->set_dtype(tensorflow::DT_FLOAT); tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); - ExcludeLayer(net, li, 0, false); + net.mutable_node(tensorId)->set_name(name); + CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); layers_to_ignore.insert(name); continue; } @@ -1477,6 +1478,17 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); } + else if (type == "L2Normalize") + { + // op: "L2Normalize" + // input: "input" + CV_Assert(layer.input_size() == 1); + layerParams.set("across_spatial", false); + layerParams.set("channel_shared", false); + int id = dstNet.addLayer(name, "Normalize", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + } else if (type == "PriorBox") { if (hasLayerAttr(layer, "min_size")) @@ -1489,6 +1501,8 @@ void TFImporter::populateNet(Net dstNet) layerParams.set("clip", getLayerAttr(layer, "clip").b()); if (hasLayerAttr(layer, "offset")) layerParams.set("offset", getLayerAttr(layer, "offset").f()); + if (hasLayerAttr(layer, "step")) + layerParams.set("step", getLayerAttr(layer, "step").f()); const std::string paramNames[] = {"variance", "aspect_ratio", "scales", "width", "height"}; @@ -1538,8 +1552,17 @@ void TFImporter::populateNet(Net dstNet) connect(layer_id, dstNet, parsePin(layer.input(i)), id, i); data_layouts[name] = DATA_LAYOUT_UNKNOWN; } + else if (type == "Softmax") + { + if (hasLayerAttr(layer, "axis")) + layerParams.set("axis", getLayerAttr(layer, "axis").i()); + + int id = dstNet.addLayer(name, "Softmax", layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, layer.input_size()); + } else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" || - type == "Relu" || type == "Elu" || type == "Softmax" || + type == "Relu" || type == "Elu" || type == "Identity" || type == "Relu6") { std::string dnnType = type; diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index a2b6df6e93..c1f88396ea 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -353,4 +353,28 @@ TEST(Test_TensorFlow, memory_read) runTensorFlowNet("batch_norm_text", DNN_TARGET_CPU, true, l1, lInf, true); } +TEST(Test_TensorFlow, opencv_face_detector_uint8) +{ + std::string proto = findDataFile("dnn/opencv_face_detector.pbtxt", false); + std::string model = findDataFile("dnn/opencv_face_detector_uint8.pb", false); + + Net net = readNetFromTensorflow(model, proto); + Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false)); + Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); + + net.setInput(blob); + // Output has shape 1x1xNx7 where N - number of detections. + // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] + Mat out = net.forward(); + + // References are from test for Caffe model. + Mat ref = (Mat_(6, 5) << 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631, + 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168, + 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290, + 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477, + 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494, + 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801); + normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref, "", 2.8e-4, 3.4e-3); +} + }