mirror of https://github.com/opencv/opencv.git
commit
7fb70e1701
20 changed files with 397 additions and 73 deletions
@ -0,0 +1,146 @@ |
||||
# Import required modules |
||||
import cv2 as cv |
||||
import math |
||||
import argparse |
||||
|
||||
############ Add argument parser for command line arguments ############ |
||||
parser = argparse.ArgumentParser(description='Use this script to run TensorFlow implementation (https://github.com/argman/EAST) of EAST: An Efficient and Accurate Scene Text Detector (https://arxiv.org/abs/1704.03155v2)') |
||||
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') |
||||
parser.add_argument('--model', required=True, |
||||
help='Path to a binary .pb file of model contains trained weights.') |
||||
parser.add_argument('--width', type=int, default=320, |
||||
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.') |
||||
parser.add_argument('--height',type=int, default=320, |
||||
help='Preprocess input image by resizing to a specific height. It should be multiple by 32.') |
||||
parser.add_argument('--thr',type=float, default=0.5, |
||||
help='Confidence threshold.') |
||||
parser.add_argument('--nms',type=float, default=0.4, |
||||
help='Non-maximum suppression threshold.') |
||||
args = parser.parse_args() |
||||
|
||||
############ Utility functions ############ |
||||
def decode(scores, geometry, scoreThresh): |
||||
detections = [] |
||||
confidences = [] |
||||
|
||||
############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############ |
||||
assert len(scores.shape) == 4, "Incorrect dimensions of scores" |
||||
assert len(geometry.shape) == 4, "Incorrect dimensions of geometry" |
||||
assert scores.shape[0] == 1, "Invalid dimensions of scores" |
||||
assert geometry.shape[0] == 1, "Invalid dimensions of geometry" |
||||
assert scores.shape[1] == 1, "Invalid dimensions of scores" |
||||
assert geometry.shape[1] == 5, "Invalid dimensions of geometry" |
||||
assert scores.shape[2] == geometry.shape[2], "Invalid dimensions of scores and geometry" |
||||
assert scores.shape[3] == geometry.shape[3], "Invalid dimensions of scores and geometry" |
||||
height = scores.shape[2] |
||||
width = scores.shape[3] |
||||
for y in range(0, height): |
||||
|
||||
# Extract data from scores |
||||
scoresData = scores[0][0][y] |
||||
x0_data = geometry[0][0][y] |
||||
x1_data = geometry[0][1][y] |
||||
x2_data = geometry[0][2][y] |
||||
x3_data = geometry[0][3][y] |
||||
anglesData = geometry[0][4][y] |
||||
for x in range(0, width): |
||||
score = scoresData[x] |
||||
|
||||
# If score is lower than threshold score, move to next x |
||||
if(score < scoreThresh): |
||||
continue |
||||
|
||||
# Calculate offset |
||||
offsetX = x * 4.0 |
||||
offsetY = y * 4.0 |
||||
angle = anglesData[x] |
||||
|
||||
# Calculate cos and sin of angle |
||||
cosA = math.cos(angle) |
||||
sinA = math.sin(angle) |
||||
h = x0_data[x] + x2_data[x] |
||||
w = x1_data[x] + x3_data[x] |
||||
|
||||
# Calculate offset |
||||
offset = ([offsetX + cosA * x1_data[x] + sinA * x2_data[x], offsetY - sinA * x1_data[x] + cosA * x2_data[x]]) |
||||
|
||||
# Find points for rectangle |
||||
p1 = (-sinA * h + offset[0], -cosA * h + offset[1]) |
||||
p3 = (-cosA * w + offset[0], sinA * w + offset[1]) |
||||
center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1])) |
||||
detections.append((center, (w,h), -1*angle * 180.0 / math.pi)) |
||||
confidences.append(float(score)) |
||||
|
||||
# Return detections and confidences |
||||
return [detections, confidences] |
||||
|
||||
def main(): |
||||
# Read and store arguments |
||||
confThreshold = args.thr |
||||
nmsThreshold = args.nms |
||||
inpWidth = args.width |
||||
inpHeight = args.height |
||||
model = args.model |
||||
|
||||
# Load network |
||||
net = cv.dnn.readNet(model) |
||||
|
||||
# Create a new named window |
||||
kWinName = "EAST: An Efficient and Accurate Scene Text Detector" |
||||
cv.namedWindow(kWinName, cv.WINDOW_NORMAL) |
||||
outNames = [] |
||||
outNames.append("feature_fusion/Conv_7/Sigmoid") |
||||
outNames.append("feature_fusion/concat_3") |
||||
|
||||
# Open a video file or an image file or a camera stream |
||||
cap = cv.VideoCapture(args.input if args.input else 0) |
||||
|
||||
while cv.waitKey(1) < 0: |
||||
# Read frame |
||||
hasFrame, frame = cap.read() |
||||
if not hasFrame: |
||||
cv.waitKey() |
||||
break |
||||
|
||||
# Get frame height and width |
||||
height_ = frame.shape[0] |
||||
width_ = frame.shape[1] |
||||
rW = width_ / float(inpWidth) |
||||
rH = height_ / float(inpHeight) |
||||
|
||||
# Create a 4D blob from frame. |
||||
blob = cv.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False) |
||||
|
||||
# Run the model |
||||
net.setInput(blob) |
||||
outs = net.forward(outNames) |
||||
t, _ = net.getPerfProfile() |
||||
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) |
||||
|
||||
# Get scores and geometry |
||||
scores = outs[0] |
||||
geometry = outs[1] |
||||
[boxes, confidences] = decode(scores, geometry, confThreshold) |
||||
|
||||
# Apply NMS |
||||
indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,nmsThreshold) |
||||
for i in indices: |
||||
# get 4 corners of the rotated rect |
||||
vertices = cv.boxPoints(boxes[i[0]]) |
||||
# scale the bounding box coordinates based on the respective ratios |
||||
for j in range(4): |
||||
vertices[j][0] *= rW |
||||
vertices[j][1] *= rH |
||||
for j in range(4): |
||||
p1 = (vertices[j][0], vertices[j][1]) |
||||
p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) |
||||
cv.line(frame, p1, p2, (0, 255, 0), 1); |
||||
|
||||
# Put efficiency information |
||||
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) |
||||
|
||||
# Display the frame |
||||
cv.imshow(kWinName,frame) |
||||
|
||||
if __name__ == "__main__": |
||||
main() |
Loading…
Reference in new issue