@ -31,6 +31,7 @@ parser.add_argument('--height', type=int, |
parser.add_argument('--rgb', action='store_true', |
help='Indicate that model works with RGB input images instead BGR ones.') |
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') |
parser.add_argument('--nms', type=float, default=0.4, help='Non-maximum suppression threshold') |
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, |
help="Choose one of computation backends: " |
"%d: automatically (by default), " |
@ -57,6 +58,7 @@ net.setPreferableBackend(args.backend) |
net.setPreferableTarget(args.target) |
confThreshold = args.thr |
nmsThreshold = args.nms |
def getOutputsNames(net): |
layersNames = net.getLayerNames() |
@ -86,36 +88,43 @@ def postprocess(frame, outs): |
lastLayerId = net.getLayerId(layerNames[-1]) |
lastLayer = net.getLayer(lastLayerId) |
classIds = [] |
confidences = [] |
boxes = [] |
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN |
# Network produces output blob with a shape 1x1xNx7 where N is a number of |
# detections and an every detection is a vector of values |
# [batchId, classId, confidence, left, top, right, bottom] |
assert(len(outs) == 1) |
out = outs[0] |
for detection in out[0, 0]: |
confidence = detection[2] |
if confidence > confThreshold: |
left = int(detection[3]) |
top = int(detection[4]) |
right = int(detection[5]) |
bottom = int(detection[6]) |
classId = int(detection[1]) - 1 # Skip background label |
drawPred(classId, confidence, left, top, right, bottom) |
for out in outs: |
for detection in out[0, 0]: |
confidence = detection[2] |
if confidence > confThreshold: |
left = int(detection[3]) |
top = int(detection[4]) |
right = int(detection[5]) |
bottom = int(detection[6]) |
width = right - left + 1 |
height = bottom - top + 1 |
classIds.append(int(detection[1]) - 1) # Skip background label |
confidences.append(float(confidence)) |
boxes.append([left, top, width, height]) |
elif lastLayer.type == 'DetectionOutput': |
# Network produces output blob with a shape 1x1xNx7 where N is a number of |
# detections and an every detection is a vector of values |
# [batchId, classId, confidence, left, top, right, bottom] |
assert(len(outs) == 1) |
out = outs[0] |
for detection in out[0, 0]: |
confidence = detection[2] |
if confidence > confThreshold: |
left = int(detection[3] * frameWidth) |
top = int(detection[4] * frameHeight) |
right = int(detection[5] * frameWidth) |
bottom = int(detection[6] * frameHeight) |
classId = int(detection[1]) - 1 # Skip background label |
drawPred(classId, confidence, left, top, right, bottom) |
for out in outs: |
for detection in out[0, 0]: |
confidence = detection[2] |
if confidence > confThreshold: |
left = int(detection[3] * frameWidth) |
top = int(detection[4] * frameHeight) |
right = int(detection[5] * frameWidth) |
bottom = int(detection[6] * frameHeight) |
width = right - left + 1 |
height = bottom - top + 1 |
classIds.append(int(detection[1]) - 1) # Skip background label |
confidences.append(float(confidence)) |
boxes.append([left, top, width, height]) |
elif lastLayer.type == 'Region': |
# Network produces output blob with a shape NxC where N is a number of |
# detected objects and C is a number of classes + 4 where the first 4 |
@ -138,15 +147,19 @@ def postprocess(frame, outs): |
classIds.append(classId) |
confidences.append(float(confidence)) |
boxes.append([left, top, width, height]) |
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, 0.4) |
for i in indices: |
i = i[0] |
box = boxes[i] |
left = box[0] |
top = box[1] |
width = box[2] |
height = box[3] |
drawPred(classIds[i], confidences[i], left, top, left + width, top + height) |
else: |
print('Unknown output layer type: ' + lastLayer.type) |
exit() |
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) |
for i in indices: |
i = i[0] |
box = boxes[i] |
left = box[0] |
top = box[1] |
width = box[2] |
height = box[3] |
drawPred(classIds[i], confidences[i], left, top, left + width, top + height) |
# Process inputs |
winName = 'Deep learning object detection in OpenCV' |