parent
53305d4a7e
commit
eab556e1e0
4 changed files with 592 additions and 2 deletions
@ -0,0 +1,195 @@ |
|||||||
|
# This script is used to estimate an accuracy of different face detection models. |
||||||
|
# COCO evaluation tool is used to compute an accuracy metrics (Average Precision). |
||||||
|
# Script works with different face detection datasets. |
||||||
|
import os |
||||||
|
import json |
||||||
|
from fnmatch import fnmatch |
||||||
|
from math import pi |
||||||
|
import cv2 as cv |
||||||
|
import argparse |
||||||
|
import os |
||||||
|
import sys |
||||||
|
from pycocotools.coco import COCO |
||||||
|
from pycocotools.cocoeval import COCOeval |
||||||
|
|
||||||
|
parser = argparse.ArgumentParser( |
||||||
|
description='Evaluate OpenCV face detection algorithms ' |
||||||
|
'using COCO evaluation tool, http://cocodataset.org/#detections-eval') |
||||||
|
parser.add_argument('--proto', help='Path to .prototxt of Caffe model or .pbtxt of TensorFlow graph') |
||||||
|
parser.add_argument('--model', help='Path to .caffemodel trained in Caffe or .pb from TensorFlow') |
||||||
|
parser.add_argument('--caffe', help='Indicate that tested model is from Caffe. Otherwise model from TensorFlow is expected.', action='store_true') |
||||||
|
parser.add_argument('--cascade', help='Optional path to trained Haar cascade as ' |
||||||
|
'an additional model for evaluation') |
||||||
|
parser.add_argument('--ann', help='Path to text file with ground truth annotations') |
||||||
|
parser.add_argument('--pics', help='Path to images root directory') |
||||||
|
parser.add_argument('--fddb', help='Evaluate FDDB dataset, http://vis-www.cs.umass.edu/fddb/', action='store_true') |
||||||
|
parser.add_argument('--wider', help='Evaluate WIDER FACE dataset, http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/', action='store_true') |
||||||
|
args = parser.parse_args() |
||||||
|
|
||||||
|
dataset = {} |
||||||
|
dataset['images'] = [] |
||||||
|
dataset['categories'] = [{ 'id': 0, 'name': 'face' }] |
||||||
|
dataset['annotations'] = [] |
||||||
|
|
||||||
|
def ellipse2Rect(params): |
||||||
|
rad_x = params[0] |
||||||
|
rad_y = params[1] |
||||||
|
angle = params[2] * 180.0 / pi |
||||||
|
center_x = params[3] |
||||||
|
center_y = params[4] |
||||||
|
pts = cv.ellipse2Poly((int(center_x), int(center_y)), (int(rad_x), int(rad_y)), |
||||||
|
int(angle), 0, 360, 10) |
||||||
|
rect = cv.boundingRect(pts) |
||||||
|
left = rect[0] |
||||||
|
top = rect[1] |
||||||
|
right = rect[0] + rect[2] |
||||||
|
bottom = rect[1] + rect[3] |
||||||
|
return left, top, right, bottom |
||||||
|
|
||||||
|
def addImage(imagePath): |
||||||
|
assert('images' in dataset) |
||||||
|
imageId = len(dataset['images']) |
||||||
|
dataset['images'].append({ |
||||||
|
'id': int(imageId), |
||||||
|
'file_name': imagePath |
||||||
|
}) |
||||||
|
return imageId |
||||||
|
|
||||||
|
def addBBox(imageId, left, top, width, height): |
||||||
|
assert('annotations' in dataset) |
||||||
|
dataset['annotations'].append({ |
||||||
|
'id': len(dataset['annotations']), |
||||||
|
'image_id': int(imageId), |
||||||
|
'category_id': 0, # Face |
||||||
|
'bbox': [int(left), int(top), int(width), int(height)], |
||||||
|
'iscrowd': 0, |
||||||
|
'area': float(width * height) |
||||||
|
}) |
||||||
|
|
||||||
|
def addDetection(detections, imageId, left, top, width, height, score): |
||||||
|
detections.append({ |
||||||
|
'image_id': int(imageId), |
||||||
|
'category_id': 0, # Face |
||||||
|
'bbox': [int(left), int(top), int(width), int(height)], |
||||||
|
'score': float(score) |
||||||
|
}) |
||||||
|
|
||||||
|
|
||||||
|
def fddb_dataset(annotations, images): |
||||||
|
for d in os.listdir(annotations): |
||||||
|
if fnmatch(d, 'FDDB-fold-*-ellipseList.txt'): |
||||||
|
with open(os.path.join(annotations, d), 'rt') as f: |
||||||
|
lines = [line.rstrip('\n') for line in f] |
||||||
|
lineId = 0 |
||||||
|
while lineId < len(lines): |
||||||
|
# Image |
||||||
|
imgPath = lines[lineId] |
||||||
|
lineId += 1 |
||||||
|
imageId = addImage(os.path.join(images, imgPath) + '.jpg') |
||||||
|
|
||||||
|
img = cv.imread(os.path.join(images, imgPath) + '.jpg') |
||||||
|
|
||||||
|
# Faces |
||||||
|
numFaces = int(lines[lineId]) |
||||||
|
lineId += 1 |
||||||
|
for i in range(numFaces): |
||||||
|
params = [float(v) for v in lines[lineId].split()] |
||||||
|
lineId += 1 |
||||||
|
left, top, right, bottom = ellipse2Rect(params) |
||||||
|
addBBox(imageId, left, top, width=right - left + 1, |
||||||
|
height=bottom - top + 1) |
||||||
|
|
||||||
|
|
||||||
|
def wider_dataset(annotations, images): |
||||||
|
with open(annotations, 'rt') as f: |
||||||
|
lines = [line.rstrip('\n') for line in f] |
||||||
|
lineId = 0 |
||||||
|
while lineId < len(lines): |
||||||
|
# Image |
||||||
|
imgPath = lines[lineId] |
||||||
|
lineId += 1 |
||||||
|
imageId = addImage(os.path.join(images, imgPath)) |
||||||
|
|
||||||
|
# Faces |
||||||
|
numFaces = int(lines[lineId]) |
||||||
|
lineId += 1 |
||||||
|
for i in range(numFaces): |
||||||
|
params = [int(v) for v in lines[lineId].split()] |
||||||
|
lineId += 1 |
||||||
|
left, top, width, height = params[0], params[1], params[2], params[3] |
||||||
|
addBBox(imageId, left, top, width, height) |
||||||
|
|
||||||
|
def evaluate(): |
||||||
|
cocoGt = COCO('annotations.json') |
||||||
|
cocoDt = cocoGt.loadRes('detections.json') |
||||||
|
cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') |
||||||
|
cocoEval.evaluate() |
||||||
|
cocoEval.accumulate() |
||||||
|
cocoEval.summarize() |
||||||
|
|
||||||
|
|
||||||
|
### Convert to COCO annotations format ######################################### |
||||||
|
assert(args.fddb or args.wider) |
||||||
|
if args.fddb: |
||||||
|
fddb_dataset(args.ann, args.pics) |
||||||
|
elif args.wider: |
||||||
|
wider_dataset(args.ann, args.pics) |
||||||
|
|
||||||
|
with open('annotations.json', 'wt') as f: |
||||||
|
json.dump(dataset, f) |
||||||
|
|
||||||
|
### Obtain detections ########################################################## |
||||||
|
detections = [] |
||||||
|
if args.proto and args.model: |
||||||
|
if args.caffe: |
||||||
|
net = cv.dnn.readNetFromCaffe(args.proto, args.model) |
||||||
|
else: |
||||||
|
net = cv.dnn.readNetFromTensorflow(args.model, args.proto) |
||||||
|
|
||||||
|
def detect(img, imageId): |
||||||
|
imgWidth = img.shape[1] |
||||||
|
imgHeight = img.shape[0] |
||||||
|
net.setInput(cv.dnn.blobFromImage(img, 1.0, (300, 300), (104., 177., 123.), False, False)) |
||||||
|
out = net.forward() |
||||||
|
|
||||||
|
for i in range(out.shape[2]): |
||||||
|
confidence = out[0, 0, i, 2] |
||||||
|
left = int(out[0, 0, i, 3] * img.shape[1]) |
||||||
|
top = int(out[0, 0, i, 4] * img.shape[0]) |
||||||
|
right = int(out[0, 0, i, 5] * img.shape[1]) |
||||||
|
bottom = int(out[0, 0, i, 6] * img.shape[0]) |
||||||
|
addDetection(detections, imageId, left, top, width=right - left + 1, |
||||||
|
height=bottom - top + 1, score=confidence) |
||||||
|
|
||||||
|
elif args.cascade: |
||||||
|
cascade = cv.CascadeClassifier(args.cascade) |
||||||
|
|
||||||
|
def detect(img, imageId): |
||||||
|
srcImgGray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) |
||||||
|
faces = cascade.detectMultiScale(srcImgGray) |
||||||
|
|
||||||
|
for rect in faces: |
||||||
|
left, top, width, height = rect[0], rect[1], rect[2], rect[3] |
||||||
|
addDetection(detections, imageId, left, top, width, height, score=1.0) |
||||||
|
|
||||||
|
for i in range(len(dataset['images'])): |
||||||
|
sys.stdout.write('\r%d / %d' % (i + 1, len(dataset['images']))) |
||||||
|
sys.stdout.flush() |
||||||
|
|
||||||
|
img = cv.imread(dataset['images'][i]['file_name']) |
||||||
|
imageId = int(dataset['images'][i]['id']) |
||||||
|
|
||||||
|
detect(img, imageId) |
||||||
|
|
||||||
|
with open('detections.json', 'wt') as f: |
||||||
|
json.dump(detections, f) |
||||||
|
|
||||||
|
evaluate() |
||||||
|
|
||||||
|
|
||||||
|
def rm(f): |
||||||
|
if os.path.exists(f): |
||||||
|
os.remove(f) |
||||||
|
|
||||||
|
rm('annotations.json') |
||||||
|
rm('detections.json') |
@ -0,0 +1,348 @@ |
|||||||
|
import argparse |
||||||
|
import cv2 as cv |
||||||
|
import tensorflow as tf |
||||||
|
import numpy as np |
||||||
|
import struct |
||||||
|
|
||||||
|
from tensorflow.python.tools import optimize_for_inference_lib |
||||||
|
from tensorflow.tools.graph_transforms import TransformGraph |
||||||
|
from tensorflow.core.framework.node_def_pb2 import NodeDef |
||||||
|
from google.protobuf import text_format |
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Use this script to create TensorFlow graph " |
||||||
|
"with weights from OpenCV's face detection network. " |
||||||
|
"Only backbone part of SSD model is converted this way. " |
||||||
|
"Look for .pbtxt configuration file at " |
||||||
|
"https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/opencv_face_detector.pbtxt") |
||||||
|
parser.add_argument('--model', help='Path to .caffemodel weights', required=True) |
||||||
|
parser.add_argument('--proto', help='Path to .prototxt Caffe model definition', required=True) |
||||||
|
parser.add_argument('--pb', help='Path to output .pb TensorFlow model', required=True) |
||||||
|
parser.add_argument('--pbtxt', help='Path to output .pbxt TensorFlow graph', required=True) |
||||||
|
parser.add_argument('--quantize', help='Quantize weights to uint8', action='store_true') |
||||||
|
parser.add_argument('--fp16', help='Convert weights to half precision floats', action='store_true') |
||||||
|
args = parser.parse_args() |
||||||
|
|
||||||
|
assert(not args.quantize or not args.fp16) |
||||||
|
|
||||||
|
dtype = tf.float16 if args.fp16 else tf.float32 |
||||||
|
|
||||||
|
################################################################################ |
||||||
|
cvNet = cv.dnn.readNetFromCaffe(args.proto, args.model) |
||||||
|
|
||||||
|
def dnnLayer(name): |
||||||
|
return cvNet.getLayer(long(cvNet.getLayerId(name))) |
||||||
|
|
||||||
|
def scale(x, name): |
||||||
|
with tf.variable_scope(name): |
||||||
|
layer = dnnLayer(name) |
||||||
|
w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul') |
||||||
|
if len(layer.blobs) > 1: |
||||||
|
b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='add') |
||||||
|
return tf.nn.bias_add(tf.multiply(x, w), b) |
||||||
|
else: |
||||||
|
return tf.multiply(x, w, name) |
||||||
|
|
||||||
|
def conv(x, name, stride=1, pad='SAME', dilation=1, activ=None): |
||||||
|
with tf.variable_scope(name): |
||||||
|
layer = dnnLayer(name) |
||||||
|
w = tf.Variable(layer.blobs[0].transpose(2, 3, 1, 0), dtype=dtype, name='weights') |
||||||
|
if dilation == 1: |
||||||
|
conv = tf.nn.conv2d(x, filter=w, strides=(1, stride, stride, 1), padding=pad) |
||||||
|
else: |
||||||
|
assert(stride == 1) |
||||||
|
conv = tf.nn.atrous_conv2d(x, w, rate=dilation, padding=pad) |
||||||
|
|
||||||
|
if len(layer.blobs) > 1: |
||||||
|
b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='bias') |
||||||
|
conv = tf.nn.bias_add(conv, b) |
||||||
|
return activ(conv) if activ else conv |
||||||
|
|
||||||
|
def batch_norm(x, name): |
||||||
|
with tf.variable_scope(name): |
||||||
|
# Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input. |
||||||
|
# Here we do a cast to fp32 but remove it in the frozen graph. |
||||||
|
if x.dtype != tf.float32: |
||||||
|
x = tf.cast(x, tf.float32) |
||||||
|
|
||||||
|
layer = dnnLayer(name) |
||||||
|
assert(len(layer.blobs) >= 3) |
||||||
|
|
||||||
|
mean = layer.blobs[0].flatten() |
||||||
|
std = layer.blobs[1].flatten() |
||||||
|
scale = layer.blobs[2].flatten() |
||||||
|
|
||||||
|
eps = 1e-5 |
||||||
|
hasBias = len(layer.blobs) > 3 |
||||||
|
hasWeights = scale.shape != (1,) |
||||||
|
|
||||||
|
if not hasWeights and not hasBias: |
||||||
|
mean /= scale[0] |
||||||
|
std /= scale[0] |
||||||
|
|
||||||
|
mean = tf.Variable(mean, dtype=tf.float32, name='mean') |
||||||
|
std = tf.Variable(std, dtype=tf.float32, name='std') |
||||||
|
gamma = tf.Variable(scale if hasWeights else np.ones(mean.shape), dtype=tf.float32, name='gamma') |
||||||
|
beta = tf.Variable(layer.blobs[3].flatten() if hasBias else np.zeros(mean.shape), dtype=tf.float32, name='beta') |
||||||
|
bn = tf.nn.fused_batch_norm(x, gamma, beta, mean, std, eps, |
||||||
|
is_training=False)[0] |
||||||
|
if bn.dtype != dtype: |
||||||
|
bn = tf.cast(bn, dtype) |
||||||
|
return bn |
||||||
|
|
||||||
|
def l2norm(x, name): |
||||||
|
with tf.variable_scope(name): |
||||||
|
layer = dnnLayer(name) |
||||||
|
w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul') |
||||||
|
return tf.nn.l2_normalize(x, 3, epsilon=1e-10) * w |
||||||
|
|
||||||
|
### Graph definition ########################################################### |
||||||
|
inp = tf.placeholder(dtype, [1, 300, 300, 3], 'data') |
||||||
|
data_bn = batch_norm(inp, 'data_bn') |
||||||
|
data_scale = scale(data_bn, 'data_scale') |
||||||
|
data_scale = tf.pad(data_scale, [[0, 0], [3, 3], [3, 3], [0, 0]]) |
||||||
|
conv1_h = conv(data_scale, stride=2, pad='VALID', name='conv1_h') |
||||||
|
conv1_bn_h = batch_norm(conv1_h, 'conv1_bn_h') |
||||||
|
conv1_scale_h = scale(conv1_bn_h, 'conv1_scale_h') |
||||||
|
conv1_relu = tf.nn.relu(conv1_scale_h) |
||||||
|
conv1_pool = tf.layers.max_pooling2d(conv1_relu, pool_size=(3, 3), strides=(2, 2), |
||||||
|
padding='SAME', name='conv1_pool') |
||||||
|
|
||||||
|
layer_64_1_conv1_h = conv(conv1_pool, 'layer_64_1_conv1_h') |
||||||
|
layer_64_1_bn2_h = batch_norm(layer_64_1_conv1_h, 'layer_64_1_bn2_h') |
||||||
|
layer_64_1_scale2_h = scale(layer_64_1_bn2_h, 'layer_64_1_scale2_h') |
||||||
|
layer_64_1_relu2 = tf.nn.relu(layer_64_1_scale2_h) |
||||||
|
layer_64_1_conv2_h = conv(layer_64_1_relu2, 'layer_64_1_conv2_h') |
||||||
|
layer_64_1_sum = layer_64_1_conv2_h + conv1_pool |
||||||
|
|
||||||
|
layer_128_1_bn1_h = batch_norm(layer_64_1_sum, 'layer_128_1_bn1_h') |
||||||
|
layer_128_1_scale1_h = scale(layer_128_1_bn1_h, 'layer_128_1_scale1_h') |
||||||
|
layer_128_1_relu1 = tf.nn.relu(layer_128_1_scale1_h) |
||||||
|
layer_128_1_conv1_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv1_h') |
||||||
|
layer_128_1_bn2 = batch_norm(layer_128_1_conv1_h, 'layer_128_1_bn2') |
||||||
|
layer_128_1_scale2 = scale(layer_128_1_bn2, 'layer_128_1_scale2') |
||||||
|
layer_128_1_relu2 = tf.nn.relu(layer_128_1_scale2) |
||||||
|
layer_128_1_conv2 = conv(layer_128_1_relu2, 'layer_128_1_conv2') |
||||||
|
layer_128_1_conv_expand_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv_expand_h') |
||||||
|
layer_128_1_sum = layer_128_1_conv2 + layer_128_1_conv_expand_h |
||||||
|
|
||||||
|
layer_256_1_bn1 = batch_norm(layer_128_1_sum, 'layer_256_1_bn1') |
||||||
|
layer_256_1_scale1 = scale(layer_256_1_bn1, 'layer_256_1_scale1') |
||||||
|
layer_256_1_relu1 = tf.nn.relu(layer_256_1_scale1) |
||||||
|
layer_256_1_conv1 = tf.pad(layer_256_1_relu1, [[0, 0], [1, 1], [1, 1], [0, 0]]) |
||||||
|
layer_256_1_conv1 = conv(layer_256_1_conv1, stride=2, pad='VALID', name='layer_256_1_conv1') |
||||||
|
layer_256_1_bn2 = batch_norm(layer_256_1_conv1, 'layer_256_1_bn2') |
||||||
|
layer_256_1_scale2 = scale(layer_256_1_bn2, 'layer_256_1_scale2') |
||||||
|
layer_256_1_relu2 = tf.nn.relu(layer_256_1_scale2) |
||||||
|
layer_256_1_conv2 = conv(layer_256_1_relu2, 'layer_256_1_conv2') |
||||||
|
layer_256_1_conv_expand = conv(layer_256_1_relu1, stride=2, name='layer_256_1_conv_expand') |
||||||
|
layer_256_1_sum = layer_256_1_conv2 + layer_256_1_conv_expand |
||||||
|
|
||||||
|
layer_512_1_bn1 = batch_norm(layer_256_1_sum, 'layer_512_1_bn1') |
||||||
|
layer_512_1_scale1 = scale(layer_512_1_bn1, 'layer_512_1_scale1') |
||||||
|
layer_512_1_relu1 = tf.nn.relu(layer_512_1_scale1) |
||||||
|
layer_512_1_conv1_h = conv(layer_512_1_relu1, 'layer_512_1_conv1_h') |
||||||
|
layer_512_1_bn2_h = batch_norm(layer_512_1_conv1_h, 'layer_512_1_bn2_h') |
||||||
|
layer_512_1_scale2_h = scale(layer_512_1_bn2_h, 'layer_512_1_scale2_h') |
||||||
|
layer_512_1_relu2 = tf.nn.relu(layer_512_1_scale2_h) |
||||||
|
layer_512_1_conv2_h = conv(layer_512_1_relu2, dilation=2, name='layer_512_1_conv2_h') |
||||||
|
layer_512_1_conv_expand_h = conv(layer_512_1_relu1, 'layer_512_1_conv_expand_h') |
||||||
|
layer_512_1_sum = layer_512_1_conv2_h + layer_512_1_conv_expand_h |
||||||
|
|
||||||
|
last_bn_h = batch_norm(layer_512_1_sum, 'last_bn_h') |
||||||
|
last_scale_h = scale(last_bn_h, 'last_scale_h') |
||||||
|
fc7 = tf.nn.relu(last_scale_h, name='last_relu') |
||||||
|
|
||||||
|
conv6_1_h = conv(fc7, 'conv6_1_h', activ=tf.nn.relu) |
||||||
|
conv6_2_h = conv(conv6_1_h, stride=2, name='conv6_2_h', activ=tf.nn.relu) |
||||||
|
conv7_1_h = conv(conv6_2_h, 'conv7_1_h', activ=tf.nn.relu) |
||||||
|
conv7_2_h = tf.pad(conv7_1_h, [[0, 0], [1, 1], [1, 1], [0, 0]]) |
||||||
|
conv7_2_h = conv(conv7_2_h, stride=2, pad='VALID', name='conv7_2_h', activ=tf.nn.relu) |
||||||
|
conv8_1_h = conv(conv7_2_h, pad='SAME', name='conv8_1_h', activ=tf.nn.relu) |
||||||
|
conv8_2_h = conv(conv8_1_h, pad='SAME', name='conv8_2_h', activ=tf.nn.relu) |
||||||
|
conv9_1_h = conv(conv8_2_h, 'conv9_1_h', activ=tf.nn.relu) |
||||||
|
conv9_2_h = conv(conv9_1_h, pad='SAME', name='conv9_2_h', activ=tf.nn.relu) |
||||||
|
|
||||||
|
conv4_3_norm = l2norm(layer_256_1_relu1, 'conv4_3_norm') |
||||||
|
|
||||||
|
### Locations and confidences ################################################## |
||||||
|
locations = [] |
||||||
|
confidences = [] |
||||||
|
flattenLayersNames = [] # Collect all reshape layers names that should be replaced to flattens. |
||||||
|
for top, suffix in zip([locations, confidences], ['_mbox_loc', '_mbox_conf']): |
||||||
|
for bottom, name in zip([conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h], |
||||||
|
['conv4_3_norm', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']): |
||||||
|
name += suffix |
||||||
|
flat = tf.layers.flatten(conv(bottom, name)) |
||||||
|
flattenLayersNames.append(flat.name[:flat.name.find(':')]) |
||||||
|
top.append(flat) |
||||||
|
|
||||||
|
mbox_loc = tf.concat(locations, axis=-1, name='mbox_loc') |
||||||
|
mbox_conf = tf.concat(confidences, axis=-1, name='mbox_conf') |
||||||
|
|
||||||
|
total = int(np.prod(mbox_conf.shape[1:])) |
||||||
|
mbox_conf_reshape = tf.reshape(mbox_conf, [-1, 2], name='mbox_conf_reshape') |
||||||
|
mbox_conf_softmax = tf.nn.softmax(mbox_conf_reshape, name='mbox_conf_softmax') |
||||||
|
mbox_conf_flatten = tf.reshape(mbox_conf_softmax, [-1, total], name='mbox_conf_flatten') |
||||||
|
flattenLayersNames.append('mbox_conf_flatten') |
||||||
|
|
||||||
|
with tf.Session() as sess: |
||||||
|
sess.run(tf.global_variables_initializer()) |
||||||
|
|
||||||
|
### Check correctness ###################################################### |
||||||
|
out_nodes = ['mbox_loc', 'mbox_conf_flatten'] |
||||||
|
inp_nodes = [inp.name[:inp.name.find(':')]] |
||||||
|
|
||||||
|
np.random.seed(2701) |
||||||
|
inputData = np.random.standard_normal([1, 3, 300, 300]).astype(np.float32) |
||||||
|
|
||||||
|
cvNet.setInput(inputData) |
||||||
|
outDNN = cvNet.forward(out_nodes) |
||||||
|
|
||||||
|
outTF = sess.run([mbox_loc, mbox_conf_flatten], feed_dict={inp: inputData.transpose(0, 2, 3, 1)}) |
||||||
|
print 'Max diff @ locations: %e' % np.max(np.abs(outDNN[0] - outTF[0])) |
||||||
|
print 'Max diff @ confidence: %e' % np.max(np.abs(outDNN[1] - outTF[1])) |
||||||
|
|
||||||
|
# Save a graph |
||||||
|
graph_def = sess.graph.as_graph_def() |
||||||
|
|
||||||
|
# Freeze graph. Replaces variables to constants. |
||||||
|
graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, out_nodes) |
||||||
|
# Optimize graph. Removes training-only ops, unused nodes. |
||||||
|
graph_def = optimize_for_inference_lib.optimize_for_inference(graph_def, inp_nodes, out_nodes, dtype.as_datatype_enum) |
||||||
|
# Fuse constant operations. |
||||||
|
transforms = ["fold_constants(ignore_errors=True)"] |
||||||
|
if args.quantize: |
||||||
|
transforms += ["quantize_weights(minimum_size=0)"] |
||||||
|
transforms += ["sort_by_execution_order"] |
||||||
|
graph_def = TransformGraph(graph_def, inp_nodes, out_nodes, transforms) |
||||||
|
|
||||||
|
# By default, float16 weights are stored in repeated tensor's field called |
||||||
|
# `half_val`. It has type int32 with leading zeros for unused bytes. |
||||||
|
# This type is encoded by Varint that means only 7 bits are used for value |
||||||
|
# representation but the last one is indicated the end of encoding. This way |
||||||
|
# float16 might takes 1 or 2 or 3 bytes depends on value. To impove compression, |
||||||
|
# we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone. |
||||||
|
for node in graph_def.node: |
||||||
|
if 'value' in node.attr: |
||||||
|
halfs = node.attr["value"].tensor.half_val |
||||||
|
if not node.attr["value"].tensor.tensor_content and halfs: |
||||||
|
node.attr["value"].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs) |
||||||
|
node.attr["value"].tensor.ClearField('half_val') |
||||||
|
|
||||||
|
# Serialize |
||||||
|
with tf.gfile.FastGFile(args.pb, 'wb') as f: |
||||||
|
f.write(graph_def.SerializeToString()) |
||||||
|
|
||||||
|
|
||||||
|
################################################################################ |
||||||
|
# Write a text graph representation |
||||||
|
################################################################################ |
||||||
|
def tensorMsg(values): |
||||||
|
msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values) |
||||||
|
for value in values: |
||||||
|
msg += 'float_val: %f ' % value |
||||||
|
return msg + '}' |
||||||
|
|
||||||
|
# Remove Const nodes and unused attributes. |
||||||
|
for i in reversed(range(len(graph_def.node))): |
||||||
|
if graph_def.node[i].op in ['Const', 'Dequantize']: |
||||||
|
del graph_def.node[i] |
||||||
|
for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim', |
||||||
|
'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training', |
||||||
|
'Tpaddings']: |
||||||
|
if attr in graph_def.node[i].attr: |
||||||
|
del graph_def.node[i].attr[attr] |
||||||
|
|
||||||
|
# Append prior box generators |
||||||
|
min_sizes = [30, 60, 111, 162, 213, 264] |
||||||
|
max_sizes = [60, 111, 162, 213, 264, 315] |
||||||
|
steps = [8, 16, 32, 64, 100, 300] |
||||||
|
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] |
||||||
|
layers = [conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h] |
||||||
|
for i in range(6): |
||||||
|
priorBox = NodeDef() |
||||||
|
priorBox.name = 'PriorBox_%d' % i |
||||||
|
priorBox.op = 'PriorBox' |
||||||
|
priorBox.input.append(layers[i].name[:layers[i].name.find(':')]) |
||||||
|
priorBox.input.append(inp_nodes[0]) # data |
||||||
|
|
||||||
|
text_format.Merge('i: %d' % min_sizes[i], priorBox.attr["min_size"]) |
||||||
|
text_format.Merge('i: %d' % max_sizes[i], priorBox.attr["max_size"]) |
||||||
|
text_format.Merge('b: true', priorBox.attr["flip"]) |
||||||
|
text_format.Merge('b: false', priorBox.attr["clip"]) |
||||||
|
text_format.Merge(tensorMsg(aspect_ratios[i]), priorBox.attr["aspect_ratio"]) |
||||||
|
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"]) |
||||||
|
text_format.Merge('f: %f' % steps[i], priorBox.attr["step"]) |
||||||
|
text_format.Merge('f: 0.5', priorBox.attr["offset"]) |
||||||
|
graph_def.node.extend([priorBox]) |
||||||
|
|
||||||
|
# Concatenate prior boxes |
||||||
|
concat = NodeDef() |
||||||
|
concat.name = 'mbox_priorbox' |
||||||
|
concat.op = 'ConcatV2' |
||||||
|
for i in range(6): |
||||||
|
concat.input.append('PriorBox_%d' % i) |
||||||
|
concat.input.append('mbox_loc/axis') |
||||||
|
graph_def.node.extend([concat]) |
||||||
|
|
||||||
|
# DetectionOutput layer |
||||||
|
detectionOut = NodeDef() |
||||||
|
detectionOut.name = 'detection_out' |
||||||
|
detectionOut.op = 'DetectionOutput' |
||||||
|
|
||||||
|
detectionOut.input.append('mbox_loc') |
||||||
|
detectionOut.input.append('mbox_conf_flatten') |
||||||
|
detectionOut.input.append('mbox_priorbox') |
||||||
|
|
||||||
|
text_format.Merge('i: 2', detectionOut.attr['num_classes']) |
||||||
|
text_format.Merge('b: true', detectionOut.attr['share_location']) |
||||||
|
text_format.Merge('i: 0', detectionOut.attr['background_label_id']) |
||||||
|
text_format.Merge('f: 0.45', detectionOut.attr['nms_threshold']) |
||||||
|
text_format.Merge('i: 400', detectionOut.attr['top_k']) |
||||||
|
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type']) |
||||||
|
text_format.Merge('i: 200', detectionOut.attr['keep_top_k']) |
||||||
|
text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold']) |
||||||
|
|
||||||
|
graph_def.node.extend([detectionOut]) |
||||||
|
|
||||||
|
# Replace L2Normalization subgraph onto a single node. |
||||||
|
for i in reversed(range(len(graph_def.node))): |
||||||
|
if graph_def.node[i].name in ['conv4_3_norm/l2_normalize/Square', |
||||||
|
'conv4_3_norm/l2_normalize/Sum', |
||||||
|
'conv4_3_norm/l2_normalize/Maximum', |
||||||
|
'conv4_3_norm/l2_normalize/Rsqrt']: |
||||||
|
del graph_def.node[i] |
||||||
|
for node in graph_def.node: |
||||||
|
if node.name == 'conv4_3_norm/l2_normalize': |
||||||
|
node.op = 'L2Normalize' |
||||||
|
node.input.pop() |
||||||
|
node.input.pop() |
||||||
|
node.input.append(layer_256_1_relu1.name) |
||||||
|
break |
||||||
|
|
||||||
|
softmaxShape = NodeDef() |
||||||
|
softmaxShape.name = 'reshape_before_softmax' |
||||||
|
softmaxShape.op = 'Const' |
||||||
|
text_format.Merge( |
||||||
|
'tensor {' |
||||||
|
' dtype: DT_INT32' |
||||||
|
' tensor_shape { dim { size: 3 } }' |
||||||
|
' int_val: 0' |
||||||
|
' int_val: -1' |
||||||
|
' int_val: 2' |
||||||
|
'}', softmaxShape.attr["value"]) |
||||||
|
graph_def.node.extend([softmaxShape]) |
||||||
|
|
||||||
|
for node in graph_def.node: |
||||||
|
if node.name == 'mbox_conf_reshape': |
||||||
|
node.input[1] = softmaxShape.name |
||||||
|
elif node.name == 'mbox_conf_softmax': |
||||||
|
text_format.Merge('i: 2', node.attr['axis']) |
||||||
|
elif node.name in flattenLayersNames: |
||||||
|
node.op = 'Flatten' |
||||||
|
inpName = node.input[0] |
||||||
|
node.input.pop() |
||||||
|
node.input.pop() |
||||||
|
node.input.append(inpName) |
||||||
|
|
||||||
|
tf.train.write_graph(graph_def, "", args.pbtxt, as_text=True) |
Loading…
Reference in new issue