Merge pull request #10979 from dkurt:unite_dnn_samples
commit
ab110c0ad1
45 changed files with 2301 additions and 10810 deletions
@ -1 +0,0 @@ |
||||
*.caffemodel |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,20 @@ |
||||
Unlabeled 0 0 0 |
||||
Road 128 64 128 |
||||
Sidewalk 244 35 232 |
||||
Building 70 70 70 |
||||
Wall 102 102 156 |
||||
Fence 190 153 153 |
||||
Pole 153 153 153 |
||||
TrafficLight 250 170 30 |
||||
TrafficSign 220 220 0 |
||||
Vegetation 107 142 35 |
||||
Terrain 152 251 152 |
||||
Sky 70 130 180 |
||||
Person 220 20 60 |
||||
Rider 255 0 0 |
||||
Car 0 0 142 |
||||
Truck 0 0 70 |
||||
Bus 0 60 100 |
||||
Train 0 80 100 |
||||
Motorcycle 0 0 230 |
||||
Bicycle 119 11 32 |
||||
Unlabeled |
||||
Road |
||||
Sidewalk |
||||
Building |
||||
Wall |
||||
Fence |
||||
Pole |
||||
TrafficLight |
||||
TrafficSign |
||||
Vegetation |
||||
Terrain |
||||
Sky |
||||
Person |
||||
Rider |
||||
Car |
||||
Truck |
||||
Bus |
||||
Train |
||||
Motorcycle |
||||
Bicycle |
||||
|
@ -1,502 +0,0 @@ |
||||
# |
||||
# This prototxt is based on voc-fcn32s/val.prototxt file from |
||||
# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under |
||||
# Caffe (BSD) license: |
||||
# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license |
||||
# |
||||
name: "voc-fcn32s" |
||||
input: "data" |
||||
input_dim: 1 |
||||
input_dim: 3 |
||||
input_dim: 500 |
||||
input_dim: 500 |
||||
layer { |
||||
name: "conv1_1" |
||||
type: "Convolution" |
||||
bottom: "data" |
||||
top: "conv1_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 64 |
||||
pad: 100 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1_1" |
||||
type: "ReLU" |
||||
bottom: "conv1_1" |
||||
top: "conv1_1" |
||||
} |
||||
layer { |
||||
name: "conv1_2" |
||||
type: "Convolution" |
||||
bottom: "conv1_1" |
||||
top: "conv1_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 64 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1_2" |
||||
type: "ReLU" |
||||
bottom: "conv1_2" |
||||
top: "conv1_2" |
||||
} |
||||
layer { |
||||
name: "pool1" |
||||
type: "Pooling" |
||||
bottom: "conv1_2" |
||||
top: "pool1" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv2_1" |
||||
type: "Convolution" |
||||
bottom: "pool1" |
||||
top: "conv2_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 128 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu2_1" |
||||
type: "ReLU" |
||||
bottom: "conv2_1" |
||||
top: "conv2_1" |
||||
} |
||||
layer { |
||||
name: "conv2_2" |
||||
type: "Convolution" |
||||
bottom: "conv2_1" |
||||
top: "conv2_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 128 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu2_2" |
||||
type: "ReLU" |
||||
bottom: "conv2_2" |
||||
top: "conv2_2" |
||||
} |
||||
layer { |
||||
name: "pool2" |
||||
type: "Pooling" |
||||
bottom: "conv2_2" |
||||
top: "pool2" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv3_1" |
||||
type: "Convolution" |
||||
bottom: "pool2" |
||||
top: "conv3_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu3_1" |
||||
type: "ReLU" |
||||
bottom: "conv3_1" |
||||
top: "conv3_1" |
||||
} |
||||
layer { |
||||
name: "conv3_2" |
||||
type: "Convolution" |
||||
bottom: "conv3_1" |
||||
top: "conv3_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu3_2" |
||||
type: "ReLU" |
||||
bottom: "conv3_2" |
||||
top: "conv3_2" |
||||
} |
||||
layer { |
||||
name: "conv3_3" |
||||
type: "Convolution" |
||||
bottom: "conv3_2" |
||||
top: "conv3_3" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu3_3" |
||||
type: "ReLU" |
||||
bottom: "conv3_3" |
||||
top: "conv3_3" |
||||
} |
||||
layer { |
||||
name: "pool3" |
||||
type: "Pooling" |
||||
bottom: "conv3_3" |
||||
top: "pool3" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv4_1" |
||||
type: "Convolution" |
||||
bottom: "pool3" |
||||
top: "conv4_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu4_1" |
||||
type: "ReLU" |
||||
bottom: "conv4_1" |
||||
top: "conv4_1" |
||||
} |
||||
layer { |
||||
name: "conv4_2" |
||||
type: "Convolution" |
||||
bottom: "conv4_1" |
||||
top: "conv4_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu4_2" |
||||
type: "ReLU" |
||||
bottom: "conv4_2" |
||||
top: "conv4_2" |
||||
} |
||||
layer { |
||||
name: "conv4_3" |
||||
type: "Convolution" |
||||
bottom: "conv4_2" |
||||
top: "conv4_3" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu4_3" |
||||
type: "ReLU" |
||||
bottom: "conv4_3" |
||||
top: "conv4_3" |
||||
} |
||||
layer { |
||||
name: "pool4" |
||||
type: "Pooling" |
||||
bottom: "conv4_3" |
||||
top: "pool4" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv5_1" |
||||
type: "Convolution" |
||||
bottom: "pool4" |
||||
top: "conv5_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu5_1" |
||||
type: "ReLU" |
||||
bottom: "conv5_1" |
||||
top: "conv5_1" |
||||
} |
||||
layer { |
||||
name: "conv5_2" |
||||
type: "Convolution" |
||||
bottom: "conv5_1" |
||||
top: "conv5_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu5_2" |
||||
type: "ReLU" |
||||
bottom: "conv5_2" |
||||
top: "conv5_2" |
||||
} |
||||
layer { |
||||
name: "conv5_3" |
||||
type: "Convolution" |
||||
bottom: "conv5_2" |
||||
top: "conv5_3" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu5_3" |
||||
type: "ReLU" |
||||
bottom: "conv5_3" |
||||
top: "conv5_3" |
||||
} |
||||
layer { |
||||
name: "pool5" |
||||
type: "Pooling" |
||||
bottom: "conv5_3" |
||||
top: "pool5" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "fc6" |
||||
type: "Convolution" |
||||
bottom: "pool5" |
||||
top: "fc6" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 4096 |
||||
pad: 0 |
||||
kernel_size: 7 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu6" |
||||
type: "ReLU" |
||||
bottom: "fc6" |
||||
top: "fc6" |
||||
} |
||||
layer { |
||||
name: "fc7" |
||||
type: "Convolution" |
||||
bottom: "fc6" |
||||
top: "fc7" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 4096 |
||||
pad: 0 |
||||
kernel_size: 1 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu7" |
||||
type: "ReLU" |
||||
bottom: "fc7" |
||||
top: "fc7" |
||||
} |
||||
layer { |
||||
name: "score_fr" |
||||
type: "Convolution" |
||||
bottom: "fc7" |
||||
top: "score_fr" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
pad: 0 |
||||
kernel_size: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "upscore" |
||||
type: "Deconvolution" |
||||
bottom: "score_fr" |
||||
top: "upscore" |
||||
param { |
||||
lr_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
bias_term: false |
||||
kernel_size: 64 |
||||
stride: 32 |
||||
} |
||||
} |
||||
layer { |
||||
name: "score" |
||||
type: "Crop" |
||||
bottom: "upscore" |
||||
bottom: "data" |
||||
top: "score" |
||||
crop_param { |
||||
axis: 2 |
||||
offset: 19 |
||||
} |
||||
} |
@ -1,612 +0,0 @@ |
||||
# |
||||
# This prototxt is based on voc-fcn8s/val.prototxt file from |
||||
# https://github.com/shelhamer/fcn.berkeleyvision.org, which is distributed under |
||||
# Caffe (BSD) license: |
||||
# http://caffe.berkeleyvision.org/model_zoo.html#bvlc-model-license |
||||
# |
||||
name: "voc-fcn8s" |
||||
input: "data" |
||||
input_dim: 1 |
||||
input_dim: 3 |
||||
input_dim: 500 |
||||
input_dim: 500 |
||||
layer { |
||||
name: "conv1_1" |
||||
type: "Convolution" |
||||
bottom: "data" |
||||
top: "conv1_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 64 |
||||
pad: 100 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1_1" |
||||
type: "ReLU" |
||||
bottom: "conv1_1" |
||||
top: "conv1_1" |
||||
} |
||||
layer { |
||||
name: "conv1_2" |
||||
type: "Convolution" |
||||
bottom: "conv1_1" |
||||
top: "conv1_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 64 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu1_2" |
||||
type: "ReLU" |
||||
bottom: "conv1_2" |
||||
top: "conv1_2" |
||||
} |
||||
layer { |
||||
name: "pool1" |
||||
type: "Pooling" |
||||
bottom: "conv1_2" |
||||
top: "pool1" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv2_1" |
||||
type: "Convolution" |
||||
bottom: "pool1" |
||||
top: "conv2_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 128 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu2_1" |
||||
type: "ReLU" |
||||
bottom: "conv2_1" |
||||
top: "conv2_1" |
||||
} |
||||
layer { |
||||
name: "conv2_2" |
||||
type: "Convolution" |
||||
bottom: "conv2_1" |
||||
top: "conv2_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 128 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu2_2" |
||||
type: "ReLU" |
||||
bottom: "conv2_2" |
||||
top: "conv2_2" |
||||
} |
||||
layer { |
||||
name: "pool2" |
||||
type: "Pooling" |
||||
bottom: "conv2_2" |
||||
top: "pool2" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv3_1" |
||||
type: "Convolution" |
||||
bottom: "pool2" |
||||
top: "conv3_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu3_1" |
||||
type: "ReLU" |
||||
bottom: "conv3_1" |
||||
top: "conv3_1" |
||||
} |
||||
layer { |
||||
name: "conv3_2" |
||||
type: "Convolution" |
||||
bottom: "conv3_1" |
||||
top: "conv3_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu3_2" |
||||
type: "ReLU" |
||||
bottom: "conv3_2" |
||||
top: "conv3_2" |
||||
} |
||||
layer { |
||||
name: "conv3_3" |
||||
type: "Convolution" |
||||
bottom: "conv3_2" |
||||
top: "conv3_3" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 256 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu3_3" |
||||
type: "ReLU" |
||||
bottom: "conv3_3" |
||||
top: "conv3_3" |
||||
} |
||||
layer { |
||||
name: "pool3" |
||||
type: "Pooling" |
||||
bottom: "conv3_3" |
||||
top: "pool3" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv4_1" |
||||
type: "Convolution" |
||||
bottom: "pool3" |
||||
top: "conv4_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu4_1" |
||||
type: "ReLU" |
||||
bottom: "conv4_1" |
||||
top: "conv4_1" |
||||
} |
||||
layer { |
||||
name: "conv4_2" |
||||
type: "Convolution" |
||||
bottom: "conv4_1" |
||||
top: "conv4_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu4_2" |
||||
type: "ReLU" |
||||
bottom: "conv4_2" |
||||
top: "conv4_2" |
||||
} |
||||
layer { |
||||
name: "conv4_3" |
||||
type: "Convolution" |
||||
bottom: "conv4_2" |
||||
top: "conv4_3" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu4_3" |
||||
type: "ReLU" |
||||
bottom: "conv4_3" |
||||
top: "conv4_3" |
||||
} |
||||
layer { |
||||
name: "pool4" |
||||
type: "Pooling" |
||||
bottom: "conv4_3" |
||||
top: "pool4" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "conv5_1" |
||||
type: "Convolution" |
||||
bottom: "pool4" |
||||
top: "conv5_1" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu5_1" |
||||
type: "ReLU" |
||||
bottom: "conv5_1" |
||||
top: "conv5_1" |
||||
} |
||||
layer { |
||||
name: "conv5_2" |
||||
type: "Convolution" |
||||
bottom: "conv5_1" |
||||
top: "conv5_2" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu5_2" |
||||
type: "ReLU" |
||||
bottom: "conv5_2" |
||||
top: "conv5_2" |
||||
} |
||||
layer { |
||||
name: "conv5_3" |
||||
type: "Convolution" |
||||
bottom: "conv5_2" |
||||
top: "conv5_3" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 512 |
||||
pad: 1 |
||||
kernel_size: 3 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu5_3" |
||||
type: "ReLU" |
||||
bottom: "conv5_3" |
||||
top: "conv5_3" |
||||
} |
||||
layer { |
||||
name: "pool5" |
||||
type: "Pooling" |
||||
bottom: "conv5_3" |
||||
top: "pool5" |
||||
pooling_param { |
||||
pool: MAX |
||||
kernel_size: 2 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "fc6" |
||||
type: "Convolution" |
||||
bottom: "pool5" |
||||
top: "fc6" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 4096 |
||||
pad: 0 |
||||
kernel_size: 7 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu6" |
||||
type: "ReLU" |
||||
bottom: "fc6" |
||||
top: "fc6" |
||||
} |
||||
layer { |
||||
name: "fc7" |
||||
type: "Convolution" |
||||
bottom: "fc6" |
||||
top: "fc7" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 4096 |
||||
pad: 0 |
||||
kernel_size: 1 |
||||
stride: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "relu7" |
||||
type: "ReLU" |
||||
bottom: "fc7" |
||||
top: "fc7" |
||||
} |
||||
layer { |
||||
name: "score_fr" |
||||
type: "Convolution" |
||||
bottom: "fc7" |
||||
top: "score_fr" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
pad: 0 |
||||
kernel_size: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "upscore2" |
||||
type: "Deconvolution" |
||||
bottom: "score_fr" |
||||
top: "upscore2" |
||||
param { |
||||
lr_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
bias_term: false |
||||
kernel_size: 4 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "score_pool4" |
||||
type: "Convolution" |
||||
bottom: "pool4" |
||||
top: "score_pool4" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
pad: 0 |
||||
kernel_size: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "score_pool4c" |
||||
type: "Crop" |
||||
bottom: "score_pool4" |
||||
bottom: "upscore2" |
||||
top: "score_pool4c" |
||||
crop_param { |
||||
axis: 2 |
||||
offset: 5 |
||||
} |
||||
} |
||||
layer { |
||||
name: "fuse_pool4" |
||||
type: "Eltwise" |
||||
bottom: "upscore2" |
||||
bottom: "score_pool4c" |
||||
top: "fuse_pool4" |
||||
eltwise_param { |
||||
operation: SUM |
||||
} |
||||
} |
||||
layer { |
||||
name: "upscore_pool4" |
||||
type: "Deconvolution" |
||||
bottom: "fuse_pool4" |
||||
top: "upscore_pool4" |
||||
param { |
||||
lr_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
bias_term: false |
||||
kernel_size: 4 |
||||
stride: 2 |
||||
} |
||||
} |
||||
layer { |
||||
name: "score_pool3" |
||||
type: "Convolution" |
||||
bottom: "pool3" |
||||
top: "score_pool3" |
||||
param { |
||||
lr_mult: 1 |
||||
decay_mult: 1 |
||||
} |
||||
param { |
||||
lr_mult: 2 |
||||
decay_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
pad: 0 |
||||
kernel_size: 1 |
||||
} |
||||
} |
||||
layer { |
||||
name: "score_pool3c" |
||||
type: "Crop" |
||||
bottom: "score_pool3" |
||||
bottom: "upscore_pool4" |
||||
top: "score_pool3c" |
||||
crop_param { |
||||
axis: 2 |
||||
offset: 9 |
||||
} |
||||
} |
||||
layer { |
||||
name: "fuse_pool3" |
||||
type: "Eltwise" |
||||
bottom: "upscore_pool4" |
||||
bottom: "score_pool3c" |
||||
top: "fuse_pool3" |
||||
eltwise_param { |
||||
operation: SUM |
||||
} |
||||
} |
||||
layer { |
||||
name: "upscore8" |
||||
type: "Deconvolution" |
||||
bottom: "fuse_pool3" |
||||
top: "upscore8" |
||||
param { |
||||
lr_mult: 0 |
||||
} |
||||
convolution_param { |
||||
num_output: 21 |
||||
bias_term: false |
||||
kernel_size: 16 |
||||
stride: 8 |
||||
} |
||||
} |
||||
layer { |
||||
name: "score" |
||||
type: "Crop" |
||||
bottom: "upscore8" |
||||
bottom: "data" |
||||
top: "score" |
||||
crop_param { |
||||
axis: 2 |
||||
offset: 31 |
||||
} |
||||
} |
@ -0,0 +1,90 @@ |
||||
person |
||||
bicycle |
||||
car |
||||
motorcycle |
||||
airplane |
||||
bus |
||||
train |
||||
truck |
||||
boat |
||||
traffic light |
||||
fire hydrant |
||||
|
||||
stop sign |
||||
parking meter |
||||
bench |
||||
bird |
||||
cat |
||||
dog |
||||
horse |
||||
sheep |
||||
cow |
||||
elephant |
||||
bear |
||||
zebra |
||||
giraffe |
||||
|
||||
backpack |
||||
umbrella |
||||
|
||||
|
||||
handbag |
||||
tie |
||||
suitcase |
||||
frisbee |
||||
skis |
||||
snowboard |
||||
sports ball |
||||
kite |
||||
baseball bat |
||||
baseball glove |
||||
skateboard |
||||
surfboard |
||||
tennis racket |
||||
bottle |
||||
|
||||
wine glass |
||||
cup |
||||
fork |
||||
knife |
||||
spoon |
||||
bowl |
||||
banana |
||||
apple |
||||
sandwich |
||||
orange |
||||
broccoli |
||||
carrot |
||||
hot dog |
||||
pizza |
||||
donut |
||||
cake |
||||
chair |
||||
couch |
||||
potted plant |
||||
bed |
||||
|
||||
dining table |
||||
|
||||
|
||||
toilet |
||||
|
||||
tv |
||||
laptop |
||||
mouse |
||||
remote |
||||
keyboard |
||||
cell phone |
||||
microwave |
||||
oven |
||||
toaster |
||||
sink |
||||
refrigerator |
||||
|
||||
book |
||||
clock |
||||
vase |
||||
scissors |
||||
teddy bear |
||||
hair drier |
||||
toothbrush |
@ -0,0 +1,20 @@ |
||||
aeroplane |
||||
bicycle |
||||
bird |
||||
boat |
||||
bottle |
||||
bus |
||||
car |
||||
cat |
||||
chair |
||||
cow |
||||
diningtable |
||||
dog |
||||
horse |
||||
motorbike |
||||
person |
||||
pottedplant |
||||
sheep |
||||
sofa |
||||
train |
||||
tvmonitor |
@ -1,21 +0,0 @@ |
||||
background 0 0 0 |
||||
aeroplane 128 0 0 |
||||
bicycle 0 128 0 |
||||
bird 128 128 0 |
||||
boat 0 0 128 |
||||
bottle 128 0 128 |
||||
bus 0 128 128 |
||||
car 128 128 128 |
||||
cat 64 0 0 |
||||
chair 192 0 0 |
||||
cow 64 128 0 |
||||
diningtable 192 128 0 |
||||
dog 64 0 128 |
||||
horse 192 0 128 |
||||
motorbike 64 128 128 |
||||
person 192 128 128 |
||||
pottedplant 0 64 0 |
||||
sheep 128 64 0 |
||||
sofa 0 192 0 |
||||
train 128 192 0 |
||||
tvmonitor 0 64 128 |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 27 KiB |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,33 @@ |
||||
# OpenCV deep learning module samples |
||||
|
||||
## Model Zoo |
||||
|
||||
### Object detection |
||||
|
||||
| Model | Scale | Size WxH| Mean subtraction | Channels order | |
||||
|---------------|-------|-----------|--------------------|-------| |
||||
| [MobileNet-SSD, Caffe](https://github.com/chuanqi305/MobileNet-SSD/) | `0.00784 (2/255)` | `300x300` | `127.5 127.5 127.5` | BGR | |
||||
| [OpenCV face detector](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector) | `1.0` | `300x300` | `104 177 123` | BGR | |
||||
| [SSDs from TensorFlow](https://github.com/tensorflow/models/tree/master/research/object_detection/) | `0.00784 (2/255)` | `300x300` | `127.5 127.5 127.5` | RGB | |
||||
| [YOLO](https://pjreddie.com/darknet/yolo/) | `0.00392 (1/255)` | `416x416` | `0 0 0` | RGB | |
||||
| [VGG16-SSD](https://github.com/weiliu89/caffe/tree/ssd) | `1.0` | `300x300` | `104 117 123` | BGR | |
||||
| [Faster-RCNN](https://github.com/rbgirshick/py-faster-rcnn) | `1.0` | `800x600` | `102.9801, 115.9465, 122.7717` | BGR | |
||||
| [R-FCN](https://github.com/YuwenXiong/py-R-FCN) | `1.0` | `800x600` | `102.9801 115.9465 122.7717` | BGR | |
||||
|
||||
### Classification |
||||
| Model | Scale | Size WxH| Mean subtraction | Channels order | |
||||
|---------------|-------|-----------|--------------------|-------| |
||||
| GoogLeNet | `1.0` | `224x224` | `104 117 123` | BGR | |
||||
| [SqueezeNet](https://github.com/DeepScale/SqueezeNet) | `1.0` | `227x227` | `0 0 0` | BGR | |
||||
|
||||
### Semantic segmentation |
||||
| Model | Scale | Size WxH| Mean subtraction | Channels order | |
||||
|---------------|-------|-----------|--------------------|-------| |
||||
| [ENet](https://github.com/e-lab/ENet-training) | `0.00392 (1/255)` | `1024x512` | `0 0 0` | RGB | |
||||
| FCN8s | `1.0` | `500x500` | `0 0 0` | BGR | |
||||
|
||||
## References |
||||
* [Models downloading script](https://github.com/opencv/opencv_extra/blob/master/testdata/dnn/download_models.py) |
||||
* [Configuration files adopted for OpenCV](https://github.com/opencv/opencv_extra/tree/master/testdata/dnn) |
||||
* [How to import models from TensorFlow Object Detection API](https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API) |
||||
* [Names of classes from different datasets](https://github.com/opencv/opencv/tree/master/samples/data/dnn) |
@ -1,181 +0,0 @@ |
||||
/**M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
#include <opencv2/core/utils/trace.hpp> |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include <cstdlib> |
||||
using namespace std; |
||||
|
||||
/* Find best class for the blob (i. e. class with maximal probability) */ |
||||
static void getMaxClass(const Mat &probBlob, int *classId, double *classProb) |
||||
{ |
||||
Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
|
||||
Point classNumber; |
||||
|
||||
minMaxLoc(probMat, NULL, classProb, NULL, &classNumber); |
||||
*classId = classNumber.x; |
||||
} |
||||
|
||||
static std::vector<String> readClassNames(const char *filename ) |
||||
{ |
||||
std::vector<String> classNames; |
||||
|
||||
std::ifstream fp(filename); |
||||
if (!fp.is_open()) |
||||
{ |
||||
std::cerr << "File with classes labels not found: " << filename << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
std::string name; |
||||
while (!fp.eof()) |
||||
{ |
||||
std::getline(fp, name); |
||||
if (name.length()) |
||||
classNames.push_back( name.substr(name.find(' ')+1) ); |
||||
} |
||||
|
||||
fp.close(); |
||||
return classNames; |
||||
} |
||||
|
||||
const char* params |
||||
= "{ help | false | Sample app for loading googlenet model }" |
||||
"{ proto | bvlc_googlenet.prototxt | model configuration }" |
||||
"{ model | bvlc_googlenet.caffemodel | model weights }" |
||||
"{ label | synset_words.txt | names of ILSVRC2012 classes }" |
||||
"{ image | space_shuttle.jpg | path to image file }" |
||||
"{ opencl | false | enable OpenCL }" |
||||
; |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
CV_TRACE_FUNCTION(); |
||||
|
||||
CommandLineParser parser(argc, argv, params); |
||||
|
||||
if (parser.get<bool>("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String modelTxt = parser.get<string>("proto"); |
||||
String modelBin = parser.get<string>("model"); |
||||
String imageFile = parser.get<String>("image"); |
||||
String classNameFile = parser.get<String>("label"); |
||||
|
||||
Net net; |
||||
try { |
||||
//! [Read and initialize network]
|
||||
net = dnn::readNetFromCaffe(modelTxt, modelBin); |
||||
//! [Read and initialize network]
|
||||
} |
||||
catch (const cv::Exception& e) { |
||||
std::cerr << "Exception: " << e.what() << std::endl; |
||||
//! [Check that network was read successfully]
|
||||
if (net.empty()) |
||||
{ |
||||
std::cerr << "Can't load network by using the following files: " << std::endl; |
||||
std::cerr << "prototxt: " << modelTxt << std::endl; |
||||
std::cerr << "caffemodel: " << modelBin << std::endl; |
||||
std::cerr << "bvlc_googlenet.caffemodel can be downloaded here:" << std::endl; |
||||
std::cerr << "http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel" << std::endl; |
||||
exit(-1); |
||||
} |
||||
//! [Check that network was read successfully]
|
||||
} |
||||
|
||||
if (parser.get<bool>("opencl")) |
||||
{ |
||||
net.setPreferableTarget(DNN_TARGET_OPENCL); |
||||
} |
||||
|
||||
//! [Prepare blob]
|
||||
Mat img = imread(imageFile); |
||||
if (img.empty()) |
||||
{ |
||||
std::cerr << "Can't read image from the file: " << imageFile << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
//GoogLeNet accepts only 224x224 BGR-images
|
||||
Mat inputBlob = blobFromImage(img, 1.0f, Size(224, 224), |
||||
Scalar(104, 117, 123), false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
net.setInput(inputBlob, "data"); //set the network input
|
||||
Mat prob = net.forward("prob"); //compute output
|
||||
|
||||
cv::TickMeter t; |
||||
for (int i = 0; i < 10; i++) |
||||
{ |
||||
CV_TRACE_REGION("forward"); |
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob, "data"); //set the network input
|
||||
//! [Set input blob]
|
||||
t.start(); |
||||
//! [Make forward pass]
|
||||
prob = net.forward("prob"); //compute output
|
||||
//! [Make forward pass]
|
||||
t.stop(); |
||||
} |
||||
|
||||
//! [Gather output]
|
||||
int classId; |
||||
double classProb; |
||||
getMaxClass(prob, &classId, &classProb);//find the best class
|
||||
//! [Gather output]
|
||||
|
||||
//! [Print results]
|
||||
std::vector<String> classNames = readClassNames(classNameFile.c_str()); |
||||
std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl; |
||||
std::cout << "Probability: " << classProb * 100 << "%" << std::endl; |
||||
//! [Print results]
|
||||
std::cout << "Time: " << (double)t.getTimeMilli() / t.getCounter() << " ms (average from " << t.getCounter() << " iterations)" << std::endl; |
||||
|
||||
return 0; |
||||
} //main
|
@ -0,0 +1,136 @@ |
||||
#include <fstream> |
||||
#include <sstream> |
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
|
||||
const char* keys = |
||||
"{ help h | | Print help message. }" |
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" |
||||
"{ model m | | Path to a binary file of model contains trained weights. " |
||||
"It could be a file with extensions .caffemodel (Caffe), " |
||||
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }" |
||||
"{ config c | | Path to a text file of model contains network configuration. " |
||||
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }" |
||||
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" |
||||
"{ classes | | Optional path to a text file with names of classes. }" |
||||
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" |
||||
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" |
||||
"{ width | | Preprocess input image by resizing to a specific width. }" |
||||
"{ height | | Preprocess input image by resizing to a specific height. }" |
||||
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" |
||||
"{ backend | 0 | Choose one of computation backends: " |
||||
"0: default C++ backend, " |
||||
"1: Halide language (http://halide-lang.org/), " |
||||
"2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}" |
||||
"{ target | 0 | Choose one of target computation devices: " |
||||
"0: CPU target (by default)," |
||||
"1: OpenCL }"; |
||||
|
||||
using namespace cv; |
||||
using namespace dnn; |
||||
|
||||
std::vector<std::string> classes; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
CommandLineParser parser(argc, argv, keys); |
||||
parser.about("Use this script to run classification deep learning networks using OpenCV."); |
||||
if (argc == 1 || parser.has("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
float scale = parser.get<float>("scale"); |
||||
Scalar mean = parser.get<Scalar>("mean"); |
||||
bool swapRB = parser.get<bool>("rgb"); |
||||
CV_Assert(parser.has("width"), parser.has("height")); |
||||
int inpWidth = parser.get<int>("width"); |
||||
int inpHeight = parser.get<int>("height"); |
||||
String model = parser.get<String>("model"); |
||||
String config = parser.get<String>("config"); |
||||
String framework = parser.get<String>("framework"); |
||||
int backendId = parser.get<int>("backend"); |
||||
int targetId = parser.get<int>("target"); |
||||
|
||||
// Open file with classes names.
|
||||
if (parser.has("classes")) |
||||
{ |
||||
std::string file = parser.get<String>("classes"); |
||||
std::ifstream ifs(file.c_str()); |
||||
if (!ifs.is_open()) |
||||
CV_Error(Error::StsError, "File " + file + " not found"); |
||||
std::string line; |
||||
while (std::getline(ifs, line)) |
||||
{ |
||||
classes.push_back(line); |
||||
} |
||||
} |
||||
|
||||
CV_Assert(parser.has("model")); |
||||
//! [Read and initialize network]
|
||||
Net net = readNet(model, config, framework); |
||||
net.setPreferableBackend(backendId); |
||||
net.setPreferableTarget(targetId); |
||||
//! [Read and initialize network]
|
||||
|
||||
// Create a window
|
||||
static const std::string kWinName = "Deep learning image classification in OpenCV"; |
||||
namedWindow(kWinName, WINDOW_NORMAL); |
||||
|
||||
//! [Open a video file or an image file or a camera stream]
|
||||
VideoCapture cap; |
||||
if (parser.has("input")) |
||||
cap.open(parser.get<String>("input")); |
||||
else |
||||
cap.open(0); |
||||
//! [Open a video file or an image file or a camera stream]
|
||||
|
||||
// Process frames.
|
||||
Mat frame, blob; |
||||
while (waitKey(1) < 0) |
||||
{ |
||||
cap >> frame; |
||||
if (frame.empty()) |
||||
{ |
||||
waitKey(); |
||||
break; |
||||
} |
||||
|
||||
//! [Create a 4D blob from a frame]
|
||||
blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false); |
||||
//! [Create a 4D blob from a frame]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(blob); |
||||
//! [Set input blob]
|
||||
//! [Make forward pass]
|
||||
Mat prob = net.forward(); |
||||
//! [Make forward pass]
|
||||
|
||||
//! [Get a class with a highest score]
|
||||
Point classIdPoint; |
||||
double confidence; |
||||
minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint); |
||||
int classId = classIdPoint.x; |
||||
//! [Get a class with a highest score]
|
||||
|
||||
// Put efficiency information.
|
||||
std::vector<double> layersTimes; |
||||
double freq = getTickFrequency() / 1000; |
||||
double t = net.getPerfProfile(layersTimes) / freq; |
||||
std::string label = format("Inference time: %.2f ms", t); |
||||
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||
|
||||
// Print predicted class.
|
||||
label = format("%s: %.4f", (classes.empty() ? format("Class #%d", classId).c_str() : |
||||
classes[classId].c_str()), |
||||
confidence); |
||||
putText(frame, label, Point(0, 40), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||
|
||||
imshow(kWinName, frame); |
||||
} |
||||
return 0; |
||||
} |
@ -0,0 +1,86 @@ |
||||
import cv2 as cv |
||||
import argparse |
||||
import numpy as np |
||||
import sys |
||||
|
||||
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) |
||||
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL) |
||||
|
||||
parser = argparse.ArgumentParser(description='Use this script to run classification deep learning networks using OpenCV.') |
||||
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') |
||||
parser.add_argument('--model', required=True, |
||||
help='Path to a binary file of model contains trained weights. ' |
||||
'It could be a file with extensions .caffemodel (Caffe), ' |
||||
'.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') |
||||
parser.add_argument('--config', |
||||
help='Path to a text file of model contains network configuration. ' |
||||
'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') |
||||
parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], |
||||
help='Optional name of an origin framework of the model. ' |
||||
'Detect it automatically if it does not set.') |
||||
parser.add_argument('--classes', help='Optional path to a text file with names of classes.') |
||||
parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], |
||||
help='Preprocess input image by subtracting mean values. ' |
||||
'Mean values should be in BGR order.') |
||||
parser.add_argument('--scale', type=float, default=1.0, |
||||
help='Preprocess input image by multiplying on a scale factor.') |
||||
parser.add_argument('--width', type=int, required=True, |
||||
help='Preprocess input image by resizing to a specific width.') |
||||
parser.add_argument('--height', type=int, required=True, |
||||
help='Preprocess input image by resizing to a specific height.') |
||||
parser.add_argument('--rgb', action='store_true', |
||||
help='Indicate that model works with RGB input images instead BGR ones.') |
||||
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, |
||||
help="Choose one of computation backends: " |
||||
"%d: default C++ backend, " |
||||
"%d: Halide language (http://halide-lang.org/), " |
||||
"%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends) |
||||
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, |
||||
help='Choose one of target computation devices: ' |
||||
'%d: CPU target (by default), ' |
||||
'%d: OpenCL' % targets) |
||||
args = parser.parse_args() |
||||
|
||||
# Load names of classes |
||||
classes = None |
||||
if args.classes: |
||||
with open(args.classes, 'rt') as f: |
||||
classes = f.read().rstrip('\n').split('\n') |
||||
|
||||
# Load a network |
||||
net = cv.dnn.readNet(args.model, args.config, args.framework) |
||||
net.setPreferableBackend(args.backend) |
||||
net.setPreferableTarget(args.target) |
||||
|
||||
winName = 'Deep learning image classification in OpenCV' |
||||
cv.namedWindow(winName, cv.WINDOW_NORMAL) |
||||
|
||||
cap = cv.VideoCapture(args.input if args.input else 0) |
||||
while cv.waitKey(1) < 0: |
||||
hasFrame, frame = cap.read() |
||||
if not hasFrame: |
||||
cv.waitKey() |
||||
break |
||||
|
||||
# Create a 4D blob from a frame. |
||||
blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False) |
||||
|
||||
# Run a model |
||||
net.setInput(blob) |
||||
out = net.forward() |
||||
|
||||
# Get a class with a highest score. |
||||
out = out.flatten() |
||||
classId = np.argmax(out) |
||||
confidence = out[classId] |
||||
|
||||
# Put efficiency information. |
||||
t, _ = net.getPerfProfile() |
||||
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) |
||||
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) |
||||
|
||||
# Print predicted class. |
||||
label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence) |
||||
cv.putText(frame, label, (0, 40), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) |
||||
|
||||
cv.imshow(winName, frame) |
@ -1,93 +0,0 @@ |
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/dnn/all_layers.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
|
||||
using namespace cv; |
||||
using namespace dnn; |
||||
|
||||
const char* keys = |
||||
"{ help h | | print help message }" |
||||
"{ proto p | | path to .prototxt }" |
||||
"{ model m | | path to .caffemodel }" |
||||
"{ image i | | path to input image }" |
||||
"{ conf c | 0.8 | minimal confidence }"; |
||||
|
||||
const char* classNames[] = { |
||||
"__background__", |
||||
"aeroplane", "bicycle", "bird", "boat", |
||||
"bottle", "bus", "car", "cat", "chair", |
||||
"cow", "diningtable", "dog", "horse", |
||||
"motorbike", "person", "pottedplant", |
||||
"sheep", "sofa", "train", "tvmonitor" |
||||
}; |
||||
|
||||
static const int kInpWidth = 800; |
||||
static const int kInpHeight = 600; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
// Parse command line arguments.
|
||||
CommandLineParser parser(argc, argv, keys); |
||||
parser.about("This sample is used to run Faster-RCNN and R-FCN object detection " |
||||
"models with OpenCV. You can get required models from " |
||||
"https://github.com/rbgirshick/py-faster-rcnn (Faster-RCNN) and from " |
||||
"https://github.com/YuwenXiong/py-R-FCN (R-FCN). Corresponding .prototxt " |
||||
"files may be found at https://github.com/opencv/opencv_extra/tree/master/testdata/dnn."); |
||||
if (argc == 1 || parser.has("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String protoPath = parser.get<String>("proto"); |
||||
String modelPath = parser.get<String>("model"); |
||||
String imagePath = parser.get<String>("image"); |
||||
float confThreshold = parser.get<float>("conf"); |
||||
CV_Assert(!protoPath.empty(), !modelPath.empty(), !imagePath.empty()); |
||||
|
||||
// Load a model.
|
||||
Net net = readNetFromCaffe(protoPath, modelPath); |
||||
|
||||
Mat img = imread(imagePath); |
||||
resize(img, img, Size(kInpWidth, kInpHeight)); |
||||
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false); |
||||
Mat imInfo = (Mat_<float>(1, 3) << img.rows, img.cols, 1.6f); |
||||
|
||||
net.setInput(blob, "data"); |
||||
net.setInput(imInfo, "im_info"); |
||||
|
||||
// Draw detections.
|
||||
Mat detections = net.forward(); |
||||
const float* data = (float*)detections.data; |
||||
for (size_t i = 0; i < detections.total(); i += 7) |
||||
{ |
||||
// An every detection is a vector [id, classId, confidence, left, top, right, bottom]
|
||||
float confidence = data[i + 2]; |
||||
if (confidence > confThreshold) |
||||
{ |
||||
int classId = (int)data[i + 1]; |
||||
int left = max(0, min((int)data[i + 3], img.cols - 1)); |
||||
int top = max(0, min((int)data[i + 4], img.rows - 1)); |
||||
int right = max(0, min((int)data[i + 5], img.cols - 1)); |
||||
int bottom = max(0, min((int)data[i + 6], img.rows - 1)); |
||||
|
||||
// Draw a bounding box.
|
||||
rectangle(img, Point(left, top), Point(right, bottom), Scalar(0, 255, 0)); |
||||
|
||||
// Put a label with a class name and confidence.
|
||||
String label = cv::format("%s, %.3f", classNames[classId], confidence); |
||||
int baseLine; |
||||
Size labelSize = cv::getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
||||
|
||||
top = max(top, labelSize.height); |
||||
rectangle(img, Point(left, top - labelSize.height), |
||||
Point(left + labelSize.width, top + baseLine), |
||||
Scalar(255, 255, 255), FILLED); |
||||
putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0)); |
||||
} |
||||
} |
||||
imshow("frame", img); |
||||
waitKey(); |
||||
return 0; |
||||
} |
@ -1,138 +0,0 @@ |
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include <cstdlib> |
||||
using namespace std; |
||||
|
||||
static const string fcnType = "fcn8s"; |
||||
|
||||
static vector<cv::Vec3b> readColors(const string &filename = "pascal-classes.txt") |
||||
{ |
||||
vector<cv::Vec3b> colors; |
||||
|
||||
ifstream fp(filename.c_str()); |
||||
if (!fp.is_open()) |
||||
{ |
||||
cerr << "File with colors not found: " << filename << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
string line; |
||||
while (!fp.eof()) |
||||
{ |
||||
getline(fp, line); |
||||
if (line.length()) |
||||
{ |
||||
stringstream ss(line); |
||||
|
||||
string name; ss >> name; |
||||
int temp; |
||||
cv::Vec3b color; |
||||
ss >> temp; color[0] = (uchar)temp; |
||||
ss >> temp; color[1] = (uchar)temp; |
||||
ss >> temp; color[2] = (uchar)temp; |
||||
colors.push_back(color); |
||||
} |
||||
} |
||||
|
||||
fp.close(); |
||||
return colors; |
||||
} |
||||
|
||||
static void colorizeSegmentation(const Mat &score, const vector<cv::Vec3b> &colors, cv::Mat &segm) |
||||
{ |
||||
const int rows = score.size[2]; |
||||
const int cols = score.size[3]; |
||||
const int chns = score.size[1]; |
||||
|
||||
cv::Mat maxCl=cv::Mat::zeros(rows, cols, CV_8UC1); |
||||
cv::Mat maxVal(rows, cols, CV_32FC1, cv::Scalar(-FLT_MAX)); |
||||
for (int ch = 0; ch < chns; ch++) |
||||
{ |
||||
for (int row = 0; row < rows; row++) |
||||
{ |
||||
const float *ptrScore = score.ptr<float>(0, ch, row); |
||||
uchar *ptrMaxCl = maxCl.ptr<uchar>(row); |
||||
float *ptrMaxVal = maxVal.ptr<float>(row); |
||||
for (int col = 0; col < cols; col++) |
||||
{ |
||||
if (ptrScore[col] > ptrMaxVal[col]) |
||||
{ |
||||
ptrMaxVal[col] = ptrScore[col]; |
||||
ptrMaxCl[col] = (uchar)ch; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
segm.create(rows, cols, CV_8UC3); |
||||
for (int row = 0; row < rows; row++) |
||||
{ |
||||
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row); |
||||
cv::Vec3b *ptrSegm = segm.ptr<cv::Vec3b>(row); |
||||
for (int col = 0; col < cols; col++) |
||||
{ |
||||
ptrSegm[col] = colors[ptrMaxCl[col]]; |
||||
} |
||||
} |
||||
|
||||
} |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
String modelTxt = fcnType + "-heavy-pascal.prototxt"; |
||||
String modelBin = fcnType + "-heavy-pascal.caffemodel"; |
||||
String imageFile = (argc > 1) ? argv[1] : "rgb.jpg"; |
||||
|
||||
vector<cv::Vec3b> colors = readColors(); |
||||
|
||||
//! [Initialize network]
|
||||
dnn::Net net = readNetFromCaffe(modelTxt, modelBin); |
||||
//! [Initialize network]
|
||||
|
||||
if (net.empty()) |
||||
{ |
||||
cerr << "Can't load network by using the following files: " << endl; |
||||
cerr << "prototxt: " << modelTxt << endl; |
||||
cerr << "caffemodel: " << modelBin << endl; |
||||
cerr << fcnType << "-heavy-pascal.caffemodel can be downloaded here:" << endl; |
||||
cerr << "http://dl.caffe.berkeleyvision.org/" << fcnType << "-heavy-pascal.caffemodel" << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
//! [Prepare blob]
|
||||
Mat img = imread(imageFile); |
||||
if (img.empty()) |
||||
{ |
||||
cerr << "Can't read image from the file: " << imageFile << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
resize(img, img, Size(500, 500), 0, 0, INTER_LINEAR_EXACT); //FCN accepts 500x500 BGR-images
|
||||
Mat inputBlob = blobFromImage(img, 1, Size(), Scalar(), false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob, "data"); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Make forward pass]
|
||||
double t = (double)cv::getTickCount(); |
||||
Mat score = net.forward("score"); //compute output
|
||||
t = (double)cv::getTickCount() - t; |
||||
printf("processing time: %.1fms\n", t*1000./getTickFrequency()); |
||||
//! [Make forward pass]
|
||||
|
||||
Mat colorize; |
||||
colorizeSegmentation(score, colors, colorize); |
||||
Mat show; |
||||
addWeighted(img, 0.4, colorize, 0.6, 0.0, show); |
||||
imshow("show", show); |
||||
waitKey(0); |
||||
return 0; |
||||
} //main
|
@ -1,24 +0,0 @@ |
||||
from __future__ import print_function |
||||
import numpy as np |
||||
import cv2 as cv |
||||
from cv2 import dnn |
||||
import timeit |
||||
|
||||
def timeit_forward(net): |
||||
print("Runtime:", timeit.timeit(lambda: net.forward(), number=10)) |
||||
|
||||
def get_class_list(): |
||||
with open('synset_words.txt', 'rt') as f: |
||||
return [x[x.find(" ") + 1:] for x in f] |
||||
|
||||
blob = dnn.blobFromImage(cv.imread('space_shuttle.jpg'), 1, (224, 224), (104, 117, 123), False) |
||||
print("Input:", blob.shape, blob.dtype) |
||||
|
||||
net = dnn.readNetFromCaffe('bvlc_googlenet.prototxt', 'bvlc_googlenet.caffemodel') |
||||
net.setInput(blob) |
||||
prob = net.forward() |
||||
#timeit_forward(net) #Uncomment to check performance |
||||
|
||||
print("Output:", prob.shape, prob.dtype) |
||||
classes = get_class_list() |
||||
print("Best match", classes[prob.argmax()]) |
@ -1,132 +0,0 @@ |
||||
# This script is used to demonstrate MobileNet-SSD network using OpenCV deep learning module. |
||||
# |
||||
# It works with model taken from https://github.com/chuanqi305/MobileNet-SSD/ that |
||||
# was trained in Caffe-SSD framework, https://github.com/weiliu89/caffe/tree/ssd. |
||||
# Model detects objects from 20 classes. |
||||
# |
||||
# Also TensorFlow model from TensorFlow object detection model zoo may be used to |
||||
# detect objects from 90 classes: |
||||
# http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz |
||||
# Text graph definition must be taken from opencv_extra: |
||||
# https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/ssd_mobilenet_v1_coco.pbtxt |
||||
import numpy as np |
||||
import argparse |
||||
|
||||
try: |
||||
import cv2 as cv |
||||
except ImportError: |
||||
raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, ' |
||||
'configure environment variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)') |
||||
|
||||
inWidth = 300 |
||||
inHeight = 300 |
||||
WHRatio = inWidth / float(inHeight) |
||||
inScaleFactor = 0.007843 |
||||
meanVal = 127.5 |
||||
|
||||
if __name__ == "__main__": |
||||
parser = argparse.ArgumentParser( |
||||
description='Script to run MobileNet-SSD object detection network ' |
||||
'trained either in Caffe or TensorFlow frameworks.') |
||||
parser.add_argument("--video", help="path to video file. If empty, camera's stream will be used") |
||||
parser.add_argument("--prototxt", default="MobileNetSSD_deploy.prototxt", |
||||
help='Path to text network file: ' |
||||
'MobileNetSSD_deploy.prototxt for Caffe model or ' |
||||
'ssd_mobilenet_v1_coco.pbtxt from opencv_extra for TensorFlow model') |
||||
parser.add_argument("--weights", default="MobileNetSSD_deploy.caffemodel", |
||||
help='Path to weights: ' |
||||
'MobileNetSSD_deploy.caffemodel for Caffe model or ' |
||||
'frozen_inference_graph.pb from TensorFlow.') |
||||
parser.add_argument("--num_classes", default=20, type=int, |
||||
help="Number of classes. It's 20 for Caffe model from " |
||||
"https://github.com/chuanqi305/MobileNet-SSD/ and 90 for " |
||||
"TensorFlow model from http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_11_06_2017.tar.gz") |
||||
parser.add_argument("--thr", default=0.2, type=float, help="confidence threshold to filter out weak detections") |
||||
args = parser.parse_args() |
||||
|
||||
if args.num_classes == 20: |
||||
net = cv.dnn.readNetFromCaffe(args.prototxt, args.weights) |
||||
swapRB = False |
||||
classNames = { 0: 'background', |
||||
1: 'aeroplane', 2: 'bicycle', 3: 'bird', 4: 'boat', |
||||
5: 'bottle', 6: 'bus', 7: 'car', 8: 'cat', 9: 'chair', |
||||
10: 'cow', 11: 'diningtable', 12: 'dog', 13: 'horse', |
||||
14: 'motorbike', 15: 'person', 16: 'pottedplant', |
||||
17: 'sheep', 18: 'sofa', 19: 'train', 20: 'tvmonitor' } |
||||
else: |
||||
assert(args.num_classes == 90) |
||||
net = cv.dnn.readNetFromTensorflow(args.weights, args.prototxt) |
||||
swapRB = True |
||||
classNames = { 0: 'background', |
||||
1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus', |
||||
7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant', |
||||
13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat', |
||||
18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear', |
||||
24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag', |
||||
32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard', |
||||
37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove', |
||||
41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle', |
||||
46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon', |
||||
51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange', |
||||
56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut', |
||||
61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed', |
||||
67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse', |
||||
75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven', |
||||
80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock', |
||||
86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush' } |
||||
|
||||
if args.video: |
||||
cap = cv.VideoCapture(args.video) |
||||
else: |
||||
cap = cv.VideoCapture(0) |
||||
|
||||
while True: |
||||
# Capture frame-by-frame |
||||
ret, frame = cap.read() |
||||
blob = cv.dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), (meanVal, meanVal, meanVal), swapRB) |
||||
net.setInput(blob) |
||||
detections = net.forward() |
||||
|
||||
cols = frame.shape[1] |
||||
rows = frame.shape[0] |
||||
|
||||
if cols / float(rows) > WHRatio: |
||||
cropSize = (int(rows * WHRatio), rows) |
||||
else: |
||||
cropSize = (cols, int(cols / WHRatio)) |
||||
|
||||
y1 = int((rows - cropSize[1]) / 2) |
||||
y2 = y1 + cropSize[1] |
||||
x1 = int((cols - cropSize[0]) / 2) |
||||
x2 = x1 + cropSize[0] |
||||
frame = frame[y1:y2, x1:x2] |
||||
|
||||
cols = frame.shape[1] |
||||
rows = frame.shape[0] |
||||
|
||||
for i in range(detections.shape[2]): |
||||
confidence = detections[0, 0, i, 2] |
||||
if confidence > args.thr: |
||||
class_id = int(detections[0, 0, i, 1]) |
||||
|
||||
xLeftBottom = int(detections[0, 0, i, 3] * cols) |
||||
yLeftBottom = int(detections[0, 0, i, 4] * rows) |
||||
xRightTop = int(detections[0, 0, i, 5] * cols) |
||||
yRightTop = int(detections[0, 0, i, 6] * rows) |
||||
|
||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), |
||||
(0, 255, 0)) |
||||
if class_id in classNames: |
||||
label = classNames[class_id] + ": " + str(confidence) |
||||
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
||||
|
||||
yLeftBottom = max(yLeftBottom, labelSize[1]) |
||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), |
||||
(xLeftBottom + labelSize[0], yLeftBottom + baseLine), |
||||
(255, 255, 255), cv.FILLED) |
||||
cv.putText(frame, label, (xLeftBottom, yLeftBottom), |
||||
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) |
||||
|
||||
cv.imshow("detections", frame) |
||||
if cv.waitKey(1) >= 0: |
||||
break |
@ -0,0 +1,229 @@ |
||||
#include <fstream> |
||||
#include <sstream> |
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
|
||||
const char* keys = |
||||
"{ help h | | Print help message. }" |
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" |
||||
"{ model m | | Path to a binary file of model contains trained weights. " |
||||
"It could be a file with extensions .caffemodel (Caffe), " |
||||
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }" |
||||
"{ config c | | Path to a text file of model contains network configuration. " |
||||
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }" |
||||
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" |
||||
"{ classes | | Optional path to a text file with names of classes to label detected objects. }" |
||||
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" |
||||
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" |
||||
"{ width | -1 | Preprocess input image by resizing to a specific width. }" |
||||
"{ height | -1 | Preprocess input image by resizing to a specific height. }" |
||||
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" |
||||
"{ thr | .5 | Confidence threshold. }" |
||||
"{ backend | 0 | Choose one of computation backends: " |
||||
"0: default C++ backend, " |
||||
"1: Halide language (http://halide-lang.org/), " |
||||
"2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}" |
||||
"{ target | 0 | Choose one of target computation devices: " |
||||
"0: CPU target (by default)," |
||||
"1: OpenCL }"; |
||||
|
||||
using namespace cv; |
||||
using namespace dnn; |
||||
|
||||
float confThreshold; |
||||
std::vector<std::string> classes; |
||||
|
||||
void postprocess(Mat& frame, const Mat& out, Net& net); |
||||
|
||||
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); |
||||
|
||||
void callback(int pos, void* userdata); |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
CommandLineParser parser(argc, argv, keys); |
||||
parser.about("Use this script to run object detection deep learning networks using OpenCV."); |
||||
if (argc == 1 || parser.has("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
confThreshold = parser.get<float>("thr"); |
||||
float scale = parser.get<float>("scale"); |
||||
Scalar mean = parser.get<Scalar>("mean"); |
||||
bool swapRB = parser.get<bool>("rgb"); |
||||
int inpWidth = parser.get<int>("width"); |
||||
int inpHeight = parser.get<int>("height"); |
||||
|
||||
// Open file with classes names.
|
||||
if (parser.has("classes")) |
||||
{ |
||||
std::string file = parser.get<String>("classes"); |
||||
std::ifstream ifs(file.c_str()); |
||||
if (!ifs.is_open()) |
||||
CV_Error(Error::StsError, "File " + file + " not found"); |
||||
std::string line; |
||||
while (std::getline(ifs, line)) |
||||
{ |
||||
classes.push_back(line); |
||||
} |
||||
} |
||||
|
||||
// Load a model.
|
||||
CV_Assert(parser.has("model")); |
||||
Net net = readNet(parser.get<String>("model"), parser.get<String>("config"), parser.get<String>("framework")); |
||||
net.setPreferableBackend(parser.get<int>("backend")); |
||||
net.setPreferableTarget(parser.get<int>("target")); |
||||
|
||||
// Create a window
|
||||
static const std::string kWinName = "Deep learning object detection in OpenCV"; |
||||
namedWindow(kWinName, WINDOW_NORMAL); |
||||
int initialConf = (int)(confThreshold * 100); |
||||
createTrackbar("Confidence threshold, %", kWinName, &initialConf, 99, callback); |
||||
|
||||
// Open a video file or an image file or a camera stream.
|
||||
VideoCapture cap; |
||||
if (parser.has("input")) |
||||
cap.open(parser.get<String>("input")); |
||||
else |
||||
cap.open(0); |
||||
|
||||
// Process frames.
|
||||
Mat frame, blob; |
||||
while (waitKey(1) < 0) |
||||
{ |
||||
cap >> frame; |
||||
if (frame.empty()) |
||||
{ |
||||
waitKey(); |
||||
break; |
||||
} |
||||
|
||||
// Create a 4D blob from a frame.
|
||||
Size inpSize(inpWidth > 0 ? inpWidth : frame.cols, |
||||
inpHeight > 0 ? inpHeight : frame.rows); |
||||
blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false); |
||||
|
||||
// Run a model.
|
||||
net.setInput(blob); |
||||
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
|
||||
{ |
||||
resize(frame, frame, inpSize); |
||||
Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f); |
||||
net.setInput(imInfo, "im_info"); |
||||
} |
||||
Mat out = net.forward(); |
||||
|
||||
postprocess(frame, out, net); |
||||
|
||||
// Put efficiency information.
|
||||
std::vector<double> layersTimes; |
||||
double freq = getTickFrequency() / 1000; |
||||
double t = net.getPerfProfile(layersTimes) / freq; |
||||
std::string label = format("Inference time: %.2f ms", t); |
||||
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||
|
||||
imshow(kWinName, frame); |
||||
} |
||||
return 0; |
||||
} |
||||
|
||||
void postprocess(Mat& frame, const Mat& out, Net& net) |
||||
{ |
||||
static std::vector<int> outLayers = net.getUnconnectedOutLayers(); |
||||
static std::string outLayerType = net.getLayer(outLayers[0])->type; |
||||
|
||||
float* data = (float*)out.data; |
||||
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
|
||||
{ |
||||
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||
// detections and an every detection is a vector of values
|
||||
// [batchId, classId, confidence, left, top, right, bottom]
|
||||
for (size_t i = 0; i < out.total(); i += 7) |
||||
{ |
||||
float confidence = data[i + 2]; |
||||
if (confidence > confThreshold) |
||||
{ |
||||
int left = (int)data[i + 3]; |
||||
int top = (int)data[i + 4]; |
||||
int right = (int)data[i + 5]; |
||||
int bottom = (int)data[i + 6]; |
||||
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
||||
drawPred(classId, confidence, left, top, right, bottom, frame); |
||||
} |
||||
} |
||||
} |
||||
else if (outLayerType == "DetectionOutput") |
||||
{ |
||||
// Network produces output blob with a shape 1x1xNx7 where N is a number of
|
||||
// detections and an every detection is a vector of values
|
||||
// [batchId, classId, confidence, left, top, right, bottom]
|
||||
for (size_t i = 0; i < out.total(); i += 7) |
||||
{ |
||||
float confidence = data[i + 2]; |
||||
if (confidence > confThreshold) |
||||
{ |
||||
int left = (int)(data[i + 3] * frame.cols); |
||||
int top = (int)(data[i + 4] * frame.rows); |
||||
int right = (int)(data[i + 5] * frame.cols); |
||||
int bottom = (int)(data[i + 6] * frame.rows); |
||||
int classId = (int)(data[i + 1]) - 1; // Skip 0th background class id.
|
||||
drawPred(classId, confidence, left, top, right, bottom, frame); |
||||
} |
||||
} |
||||
} |
||||
else if (outLayerType == "Region") |
||||
{ |
||||
// Network produces output blob with a shape NxC where N is a number of
|
||||
// detected objects and C is a number of classes + 4 where the first 4
|
||||
// numbers are [center_x, center_y, width, height]
|
||||
for (int i = 0; i < out.rows; ++i, data += out.cols) |
||||
{ |
||||
Mat confidences = out.row(i).colRange(5, out.cols); |
||||
Point classIdPoint; |
||||
double confidence; |
||||
minMaxLoc(confidences, 0, &confidence, 0, &classIdPoint); |
||||
if (confidence > confThreshold) |
||||
{ |
||||
int classId = classIdPoint.x; |
||||
int centerX = (int)(data[0] * frame.cols); |
||||
int centerY = (int)(data[1] * frame.rows); |
||||
int width = (int)(data[2] * frame.cols); |
||||
int height = (int)(data[3] * frame.rows); |
||||
int left = centerX - width / 2; |
||||
int top = centerY - height / 2; |
||||
drawPred(classId, (float)confidence, left, top, left + width, top + height, frame); |
||||
} |
||||
} |
||||
} |
||||
else |
||||
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType); |
||||
} |
||||
|
||||
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) |
||||
{ |
||||
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0)); |
||||
|
||||
std::string label = format("%.2f", conf); |
||||
if (!classes.empty()) |
||||
{ |
||||
CV_Assert(classId < (int)classes.size()); |
||||
label = classes[classId] + ": " + label; |
||||
} |
||||
|
||||
int baseLine; |
||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
||||
|
||||
top = max(top, labelSize.height); |
||||
rectangle(frame, Point(left, top - labelSize.height), |
||||
Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED); |
||||
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar()); |
||||
} |
||||
|
||||
void callback(int pos, void*) |
||||
{ |
||||
confThreshold = pos * 0.01f; |
||||
} |
@ -0,0 +1,164 @@ |
||||
import cv2 as cv |
||||
import argparse |
||||
import sys |
||||
import numpy as np |
||||
|
||||
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) |
||||
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL) |
||||
|
||||
parser = argparse.ArgumentParser(description='Use this script to run object detection deep learning networks using OpenCV.') |
||||
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') |
||||
parser.add_argument('--model', required=True, |
||||
help='Path to a binary file of model contains trained weights. ' |
||||
'It could be a file with extensions .caffemodel (Caffe), ' |
||||
'.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') |
||||
parser.add_argument('--config', |
||||
help='Path to a text file of model contains network configuration. ' |
||||
'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') |
||||
parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], |
||||
help='Optional name of an origin framework of the model. ' |
||||
'Detect it automatically if it does not set.') |
||||
parser.add_argument('--classes', help='Optional path to a text file with names of classes to label detected objects.') |
||||
parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], |
||||
help='Preprocess input image by subtracting mean values. ' |
||||
'Mean values should be in BGR order.') |
||||
parser.add_argument('--scale', type=float, default=1.0, |
||||
help='Preprocess input image by multiplying on a scale factor.') |
||||
parser.add_argument('--width', type=int, |
||||
help='Preprocess input image by resizing to a specific width.') |
||||
parser.add_argument('--height', type=int, |
||||
help='Preprocess input image by resizing to a specific height.') |
||||
parser.add_argument('--rgb', action='store_true', |
||||
help='Indicate that model works with RGB input images instead BGR ones.') |
||||
parser.add_argument('--thr', type=float, default=0.5, help='Confidence threshold') |
||||
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, |
||||
help="Choose one of computation backends: " |
||||
"%d: default C++ backend, " |
||||
"%d: Halide language (http://halide-lang.org/), " |
||||
"%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends) |
||||
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, |
||||
help='Choose one of target computation devices: ' |
||||
'%d: CPU target (by default), ' |
||||
'%d: OpenCL' % targets) |
||||
args = parser.parse_args() |
||||
|
||||
# Load names of classes |
||||
classes = None |
||||
if args.classes: |
||||
with open(args.classes, 'rt') as f: |
||||
classes = f.read().rstrip('\n').split('\n') |
||||
|
||||
# Load a network |
||||
net = cv.dnn.readNet(args.model, args.config, args.framework) |
||||
net.setPreferableBackend(args.backend) |
||||
net.setPreferableTarget(args.target) |
||||
|
||||
confThreshold = args.thr |
||||
|
||||
def postprocess(frame, out): |
||||
frameHeight = frame.shape[0] |
||||
frameWidth = frame.shape[1] |
||||
|
||||
def drawPred(classId, conf, left, top, right, bottom): |
||||
# Draw a bounding box. |
||||
cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0)) |
||||
|
||||
label = '%.2f' % confidence |
||||
|
||||
# Print a label of class. |
||||
if classes: |
||||
assert(classId < len(classes)) |
||||
label = '%s: %s' % (classes[classId], label) |
||||
|
||||
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
||||
top = max(top, labelSize[1]) |
||||
cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED) |
||||
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) |
||||
|
||||
layerNames = net.getLayerNames() |
||||
lastLayerId = net.getLayerId(layerNames[-1]) |
||||
lastLayer = net.getLayer(lastLayerId) |
||||
|
||||
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN |
||||
# Network produces output blob with a shape 1x1xNx7 where N is a number of |
||||
# detections and an every detection is a vector of values |
||||
# [batchId, classId, confidence, left, top, right, bottom] |
||||
for detection in out[0, 0]: |
||||
confidence = detection[2] |
||||
if confidence > confThreshold: |
||||
left = int(detection[3]) |
||||
top = int(detection[4]) |
||||
right = int(detection[5]) |
||||
bottom = int(detection[6]) |
||||
classId = int(detection[1]) - 1 # Skip background label |
||||
drawPred(classId, confidence, left, top, right, bottom) |
||||
elif lastLayer.type == 'DetectionOutput': |
||||
# Network produces output blob with a shape 1x1xNx7 where N is a number of |
||||
# detections and an every detection is a vector of values |
||||
# [batchId, classId, confidence, left, top, right, bottom] |
||||
for detection in out[0, 0]: |
||||
confidence = detection[2] |
||||
if confidence > confThreshold: |
||||
left = int(detection[3] * frameWidth) |
||||
top = int(detection[4] * frameHeight) |
||||
right = int(detection[5] * frameWidth) |
||||
bottom = int(detection[6] * frameHeight) |
||||
classId = int(detection[1]) - 1 # Skip background label |
||||
drawPred(classId, confidence, left, top, right, bottom) |
||||
elif lastLayer.type == 'Region': |
||||
# Network produces output blob with a shape NxC where N is a number of |
||||
# detected objects and C is a number of classes + 4 where the first 4 |
||||
# numbers are [center_x, center_y, width, height] |
||||
for detection in out: |
||||
confidences = detection[5:] |
||||
classId = np.argmax(confidences) |
||||
confidence = confidences[classId] |
||||
if confidence > confThreshold: |
||||
center_x = int(detection[0] * frameWidth) |
||||
center_y = int(detection[1] * frameHeight) |
||||
width = int(detection[2] * frameWidth) |
||||
height = int(detection[3] * frameHeight) |
||||
left = center_x - width / 2 |
||||
top = center_y - height / 2 |
||||
drawPred(classId, confidence, left, top, left + width, top + height) |
||||
|
||||
# Process inputs |
||||
winName = 'Deep learning object detection in OpenCV' |
||||
cv.namedWindow(winName, cv.WINDOW_NORMAL) |
||||
|
||||
def callback(pos): |
||||
global confThreshold |
||||
confThreshold = pos / 100.0 |
||||
|
||||
cv.createTrackbar('Confidence threshold, %', winName, int(confThreshold * 100), 99, callback) |
||||
|
||||
cap = cv.VideoCapture(args.input if args.input else 0) |
||||
while cv.waitKey(1) < 0: |
||||
hasFrame, frame = cap.read() |
||||
if not hasFrame: |
||||
cv.waitKey() |
||||
break |
||||
|
||||
frameHeight = frame.shape[0] |
||||
frameWidth = frame.shape[1] |
||||
|
||||
# Create a 4D blob from a frame. |
||||
inpWidth = args.width if args.width else frameWidth |
||||
inpHeight = args.height if args.height else frameHeight |
||||
blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) |
||||
|
||||
# Run a model |
||||
net.setInput(blob) |
||||
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN |
||||
frame = cv.resize(frame, (inpWidth, inpHeight)) |
||||
net.setInput(np.array([inpHeight, inpWidth, 1.6], dtype=np.float32), 'im_info'); |
||||
out = net.forward() |
||||
|
||||
postprocess(frame, out) |
||||
|
||||
# Put efficiency information. |
||||
t, _ = net.getPerfProfile() |
||||
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) |
||||
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) |
||||
|
||||
cv.imshow(winName, frame) |
@ -1,164 +0,0 @@ |
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
#include <iostream> |
||||
|
||||
using namespace cv; |
||||
using namespace std; |
||||
using namespace cv::dnn; |
||||
|
||||
const size_t inWidth = 300; |
||||
const size_t inHeight = 300; |
||||
const double inScaleFactor = 1.0; |
||||
const Scalar meanVal(104.0, 177.0, 123.0); |
||||
|
||||
const char* about = "This sample uses Single-Shot Detector " |
||||
"(https://arxiv.org/abs/1512.02325) " |
||||
"with ResNet-10 architecture to detect faces on camera/video/image.\n" |
||||
"More information about the training is available here: " |
||||
"<OPENCV_SRC_DIR>/samples/dnn/face_detector/how_to_train_face_detector.txt\n" |
||||
".caffemodel model's file is available here: " |
||||
"<OPENCV_SRC_DIR>/samples/dnn/face_detector/res10_300x300_ssd_iter_140000.caffemodel\n" |
||||
".prototxt file is available here: " |
||||
"<OPENCV_SRC_DIR>/samples/dnn/face_detector/deploy.prototxt\n"; |
||||
|
||||
const char* params |
||||
= "{ help | false | print usage }" |
||||
"{ proto | | model configuration (deploy.prototxt) }" |
||||
"{ model | | model weights (res10_300x300_ssd_iter_140000.caffemodel) }" |
||||
"{ camera_device | 0 | camera device number }" |
||||
"{ video | | video or image for detection }" |
||||
"{ opencl | false | enable OpenCL }" |
||||
"{ min_confidence | 0.5 | min confidence }"; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
CommandLineParser parser(argc, argv, params); |
||||
|
||||
if (parser.get<bool>("help")) |
||||
{ |
||||
cout << about << endl; |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String modelConfiguration = parser.get<string>("proto"); |
||||
String modelBinary = parser.get<string>("model"); |
||||
|
||||
//! [Initialize network]
|
||||
dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); |
||||
//! [Initialize network]
|
||||
|
||||
if (net.empty()) |
||||
{ |
||||
cerr << "Can't load network by using the following files: " << endl; |
||||
cerr << "prototxt: " << modelConfiguration << endl; |
||||
cerr << "caffemodel: " << modelBinary << endl; |
||||
cerr << "Models are available here:" << endl; |
||||
cerr << "<OPENCV_SRC_DIR>/samples/dnn/face_detector" << endl; |
||||
cerr << "or here:" << endl; |
||||
cerr << "https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector" << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
if (parser.get<bool>("opencl")) |
||||
{ |
||||
net.setPreferableTarget(DNN_TARGET_OPENCL); |
||||
} |
||||
|
||||
VideoCapture cap; |
||||
if (parser.get<String>("video").empty()) |
||||
{ |
||||
int cameraDevice = parser.get<int>("camera_device"); |
||||
cap = VideoCapture(cameraDevice); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
||||
return -1; |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
cap.open(parser.get<String>("video")); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
||||
return -1; |
||||
} |
||||
} |
||||
|
||||
for(;;) |
||||
{ |
||||
Mat frame; |
||||
cap >> frame; // get a new frame from camera/video or read image
|
||||
|
||||
if (frame.empty()) |
||||
{ |
||||
waitKey(); |
||||
break; |
||||
} |
||||
|
||||
if (frame.channels() == 4) |
||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
||||
|
||||
//! [Prepare blob]
|
||||
Mat inputBlob = blobFromImage(frame, inScaleFactor, |
||||
Size(inWidth, inHeight), meanVal, false, false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob, "data"); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Make forward pass]
|
||||
Mat detection = net.forward("detection_out"); //compute output
|
||||
//! [Make forward pass]
|
||||
|
||||
vector<double> layersTimings; |
||||
double freq = getTickFrequency() / 1000; |
||||
double time = net.getPerfProfile(layersTimings) / freq; |
||||
|
||||
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); |
||||
|
||||
ostringstream ss; |
||||
ss << "FPS: " << 1000/time << " ; time: " << time << " ms"; |
||||
putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255)); |
||||
|
||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
||||
for(int i = 0; i < detectionMat.rows; i++) |
||||
{ |
||||
float confidence = detectionMat.at<float>(i, 2); |
||||
|
||||
if(confidence > confidenceThreshold) |
||||
{ |
||||
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); |
||||
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); |
||||
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols); |
||||
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows); |
||||
|
||||
Rect object((int)xLeftBottom, (int)yLeftBottom, |
||||
(int)(xRightTop - xLeftBottom), |
||||
(int)(yRightTop - yLeftBottom)); |
||||
|
||||
rectangle(frame, object, Scalar(0, 255, 0)); |
||||
|
||||
ss.str(""); |
||||
ss << confidence; |
||||
String conf(ss.str()); |
||||
String label = "Face: " + conf; |
||||
int baseLine = 0; |
||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
||||
rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), |
||||
Size(labelSize.width, labelSize.height + baseLine)), |
||||
Scalar(255, 255, 255), FILLED); |
||||
putText(frame, label, Point(xLeftBottom, yLeftBottom), |
||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
||||
} |
||||
} |
||||
|
||||
imshow("detections", frame); |
||||
if (waitKey(1) >= 0) break; |
||||
} |
||||
|
||||
return 0; |
||||
} // main
|
@ -1,55 +0,0 @@ |
||||
import numpy as np |
||||
import argparse |
||||
import cv2 as cv |
||||
try: |
||||
import cv2 as cv |
||||
except ImportError: |
||||
raise ImportError('Can\'t find OpenCV Python module. If you\'ve built it from sources without installation, ' |
||||
'configure environment variable PYTHONPATH to "opencv_build_dir/lib" directory (with "python3" subdirectory if required)') |
||||
|
||||
from cv2 import dnn |
||||
|
||||
inWidth = 300 |
||||
inHeight = 300 |
||||
confThreshold = 0.5 |
||||
|
||||
prototxt = 'face_detector/deploy.prototxt' |
||||
caffemodel = 'face_detector/res10_300x300_ssd_iter_140000.caffemodel' |
||||
|
||||
if __name__ == '__main__': |
||||
net = dnn.readNetFromCaffe(prototxt, caffemodel) |
||||
cap = cv.VideoCapture(0) |
||||
while True: |
||||
ret, frame = cap.read() |
||||
cols = frame.shape[1] |
||||
rows = frame.shape[0] |
||||
|
||||
net.setInput(dnn.blobFromImage(frame, 1.0, (inWidth, inHeight), (104.0, 177.0, 123.0), False, False)) |
||||
detections = net.forward() |
||||
|
||||
perf_stats = net.getPerfProfile() |
||||
|
||||
print('Inference time, ms: %.2f' % (perf_stats[0] / cv.getTickFrequency() * 1000)) |
||||
|
||||
for i in range(detections.shape[2]): |
||||
confidence = detections[0, 0, i, 2] |
||||
if confidence > confThreshold: |
||||
xLeftBottom = int(detections[0, 0, i, 3] * cols) |
||||
yLeftBottom = int(detections[0, 0, i, 4] * rows) |
||||
xRightTop = int(detections[0, 0, i, 5] * cols) |
||||
yRightTop = int(detections[0, 0, i, 6] * rows) |
||||
|
||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom), (xRightTop, yRightTop), |
||||
(0, 255, 0)) |
||||
label = "face: %.4f" % confidence |
||||
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) |
||||
|
||||
cv.rectangle(frame, (xLeftBottom, yLeftBottom - labelSize[1]), |
||||
(xLeftBottom + labelSize[0], yLeftBottom + baseLine), |
||||
(255, 255, 255), cv.FILLED) |
||||
cv.putText(frame, label, (xLeftBottom, yLeftBottom), |
||||
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) |
||||
|
||||
cv.imshow("detections", frame) |
||||
if cv.waitKey(1) != -1: |
||||
break |
@ -0,0 +1,237 @@ |
||||
#include <fstream> |
||||
#include <sstream> |
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
|
||||
const char* keys = |
||||
"{ help h | | Print help message. }" |
||||
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}" |
||||
"{ model m | | Path to a binary file of model contains trained weights. " |
||||
"It could be a file with extensions .caffemodel (Caffe), " |
||||
".pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet) }" |
||||
"{ config c | | Path to a text file of model contains network configuration. " |
||||
"It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet) }" |
||||
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }" |
||||
"{ classes | | Optional path to a text file with names of classes. }" |
||||
"{ colors | | Optional path to a text file with colors for an every class. " |
||||
"An every color is represented with three values from 0 to 255 in BGR channels order. }" |
||||
"{ mean | | Preprocess input image by subtracting mean values. Mean values should be in BGR order and delimited by spaces. }" |
||||
"{ scale | 1 | Preprocess input image by multiplying on a scale factor. }" |
||||
"{ width | | Preprocess input image by resizing to a specific width. }" |
||||
"{ height | | Preprocess input image by resizing to a specific height. }" |
||||
"{ rgb | | Indicate that model works with RGB input images instead BGR ones. }" |
||||
"{ backend | 0 | Choose one of computation backends: " |
||||
"0: default C++ backend, " |
||||
"1: Halide language (http://halide-lang.org/), " |
||||
"2: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)}" |
||||
"{ target | 0 | Choose one of target computation devices: " |
||||
"0: CPU target (by default)," |
||||
"1: OpenCL }"; |
||||
|
||||
using namespace cv; |
||||
using namespace dnn; |
||||
|
||||
std::vector<std::string> classes; |
||||
std::vector<Vec3b> colors; |
||||
|
||||
void showLegend(); |
||||
|
||||
void colorizeSegmentation(const Mat &score, Mat &segm); |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
CommandLineParser parser(argc, argv, keys); |
||||
parser.about("Use this script to run semantic segmentation deep learning networks using OpenCV."); |
||||
if (argc == 1 || parser.has("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
float scale = parser.get<float>("scale"); |
||||
Scalar mean = parser.get<Scalar>("mean"); |
||||
bool swapRB = parser.get<bool>("rgb"); |
||||
CV_Assert(parser.has("width"), parser.has("height")); |
||||
int inpWidth = parser.get<int>("width"); |
||||
int inpHeight = parser.get<int>("height"); |
||||
String model = parser.get<String>("model"); |
||||
String config = parser.get<String>("config"); |
||||
String framework = parser.get<String>("framework"); |
||||
int backendId = parser.get<int>("backend"); |
||||
int targetId = parser.get<int>("target"); |
||||
|
||||
// Open file with classes names.
|
||||
if (parser.has("classes")) |
||||
{ |
||||
std::string file = parser.get<String>("classes"); |
||||
std::ifstream ifs(file.c_str()); |
||||
if (!ifs.is_open()) |
||||
CV_Error(Error::StsError, "File " + file + " not found"); |
||||
std::string line; |
||||
while (std::getline(ifs, line)) |
||||
{ |
||||
classes.push_back(line); |
||||
} |
||||
} |
||||
|
||||
// Open file with colors.
|
||||
if (parser.has("colors")) |
||||
{ |
||||
std::string file = parser.get<String>("colors"); |
||||
std::ifstream ifs(file.c_str()); |
||||
if (!ifs.is_open()) |
||||
CV_Error(Error::StsError, "File " + file + " not found"); |
||||
std::string line; |
||||
while (std::getline(ifs, line)) |
||||
{ |
||||
std::istringstream colorStr(line.c_str()); |
||||
|
||||
Vec3b color; |
||||
for (int i = 0; i < 3 && !colorStr.eof(); ++i) |
||||
colorStr >> color[i]; |
||||
colors.push_back(color); |
||||
} |
||||
} |
||||
|
||||
CV_Assert(parser.has("model")); |
||||
//! [Read and initialize network]
|
||||
Net net = readNet(model, config, framework); |
||||
net.setPreferableBackend(backendId); |
||||
net.setPreferableTarget(targetId); |
||||
//! [Read and initialize network]
|
||||
|
||||
// Create a window
|
||||
static const std::string kWinName = "Deep learning semantic segmentation in OpenCV"; |
||||
namedWindow(kWinName, WINDOW_NORMAL); |
||||
|
||||
//! [Open a video file or an image file or a camera stream]
|
||||
VideoCapture cap; |
||||
if (parser.has("input")) |
||||
cap.open(parser.get<String>("input")); |
||||
else |
||||
cap.open(0); |
||||
//! [Open a video file or an image file or a camera stream]
|
||||
|
||||
// Process frames.
|
||||
Mat frame, blob; |
||||
while (waitKey(1) < 0) |
||||
{ |
||||
cap >> frame; |
||||
if (frame.empty()) |
||||
{ |
||||
waitKey(); |
||||
break; |
||||
} |
||||
|
||||
//! [Create a 4D blob from a frame]
|
||||
blobFromImage(frame, blob, scale, Size(inpWidth, inpHeight), mean, swapRB, false); |
||||
//! [Create a 4D blob from a frame]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(blob); |
||||
//! [Set input blob]
|
||||
//! [Make forward pass]
|
||||
Mat score = net.forward(); |
||||
//! [Make forward pass]
|
||||
|
||||
Mat segm; |
||||
colorizeSegmentation(score, segm); |
||||
|
||||
resize(segm, segm, frame.size(), 0, 0, INTER_NEAREST); |
||||
addWeighted(frame, 0.1, segm, 0.9, 0.0, frame); |
||||
|
||||
// Put efficiency information.
|
||||
std::vector<double> layersTimes; |
||||
double freq = getTickFrequency() / 1000; |
||||
double t = net.getPerfProfile(layersTimes) / freq; |
||||
std::string label = format("Inference time: %.2f ms", t); |
||||
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); |
||||
|
||||
imshow(kWinName, frame); |
||||
if (!classes.empty()) |
||||
showLegend(); |
||||
} |
||||
return 0; |
||||
} |
||||
|
||||
void colorizeSegmentation(const Mat &score, Mat &segm) |
||||
{ |
||||
const int rows = score.size[2]; |
||||
const int cols = score.size[3]; |
||||
const int chns = score.size[1]; |
||||
|
||||
if (colors.empty()) |
||||
{ |
||||
// Generate colors.
|
||||
colors.push_back(Vec3b()); |
||||
for (int i = 1; i < chns; ++i) |
||||
{ |
||||
Vec3b color; |
||||
for (int j = 0; j < 3; ++j) |
||||
color[j] = (colors[i - 1][j] + rand() % 256) / 2; |
||||
colors.push_back(color); |
||||
} |
||||
} |
||||
else if (chns != (int)colors.size()) |
||||
{ |
||||
CV_Error(Error::StsError, format("Number of output classes does not match " |
||||
"number of colors (%d != %d)", chns, colors.size())); |
||||
} |
||||
|
||||
Mat maxCl = Mat::zeros(rows, cols, CV_8UC1); |
||||
Mat maxVal(rows, cols, CV_32FC1, score.data); |
||||
for (int ch = 1; ch < chns; ch++) |
||||
{ |
||||
for (int row = 0; row < rows; row++) |
||||
{ |
||||
const float *ptrScore = score.ptr<float>(0, ch, row); |
||||
uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row); |
||||
float *ptrMaxVal = maxVal.ptr<float>(row); |
||||
for (int col = 0; col < cols; col++) |
||||
{ |
||||
if (ptrScore[col] > ptrMaxVal[col]) |
||||
{ |
||||
ptrMaxVal[col] = ptrScore[col]; |
||||
ptrMaxCl[col] = (uchar)ch; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
segm.create(rows, cols, CV_8UC3); |
||||
for (int row = 0; row < rows; row++) |
||||
{ |
||||
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row); |
||||
Vec3b *ptrSegm = segm.ptr<Vec3b>(row); |
||||
for (int col = 0; col < cols; col++) |
||||
{ |
||||
ptrSegm[col] = colors[ptrMaxCl[col]]; |
||||
} |
||||
} |
||||
} |
||||
|
||||
void showLegend() |
||||
{ |
||||
static const int kBlockHeight = 30; |
||||
static Mat legend; |
||||
if (legend.empty()) |
||||
{ |
||||
const int numClasses = (int)classes.size(); |
||||
if ((int)colors.size() != numClasses) |
||||
{ |
||||
CV_Error(Error::StsError, format("Number of output classes does not match " |
||||
"number of labels (%d != %d)", colors.size(), classes.size())); |
||||
} |
||||
legend.create(kBlockHeight * numClasses, 200, CV_8UC3); |
||||
for (int i = 0; i < numClasses; i++) |
||||
{ |
||||
Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight); |
||||
block.setTo(colors[i]); |
||||
putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255)); |
||||
} |
||||
namedWindow("Legend", WINDOW_NORMAL); |
||||
imshow("Legend", legend); |
||||
} |
||||
} |
@ -0,0 +1,125 @@ |
||||
import cv2 as cv |
||||
import argparse |
||||
import numpy as np |
||||
import sys |
||||
|
||||
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) |
||||
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL) |
||||
|
||||
parser = argparse.ArgumentParser(description='Use this script to run semantic segmentation deep learning networks using OpenCV.') |
||||
parser.add_argument('--input', help='Path to input image or video file. Skip this argument to capture frames from a camera.') |
||||
parser.add_argument('--model', required=True, |
||||
help='Path to a binary file of model contains trained weights. ' |
||||
'It could be a file with extensions .caffemodel (Caffe), ' |
||||
'.pb (TensorFlow), .t7 or .net (Torch), .weights (Darknet)') |
||||
parser.add_argument('--config', |
||||
help='Path to a text file of model contains network configuration. ' |
||||
'It could be a file with extensions .prototxt (Caffe), .pbtxt (TensorFlow), .cfg (Darknet)') |
||||
parser.add_argument('--framework', choices=['caffe', 'tensorflow', 'torch', 'darknet'], |
||||
help='Optional name of an origin framework of the model. ' |
||||
'Detect it automatically if it does not set.') |
||||
parser.add_argument('--classes', help='Optional path to a text file with names of classes.') |
||||
parser.add_argument('--colors', help='Optional path to a text file with colors for an every class. ' |
||||
'An every color is represented with three values from 0 to 255 in BGR channels order.') |
||||
parser.add_argument('--mean', nargs='+', type=float, default=[0, 0, 0], |
||||
help='Preprocess input image by subtracting mean values. ' |
||||
'Mean values should be in BGR order.') |
||||
parser.add_argument('--scale', type=float, default=1.0, |
||||
help='Preprocess input image by multiplying on a scale factor.') |
||||
parser.add_argument('--width', type=int, required=True, |
||||
help='Preprocess input image by resizing to a specific width.') |
||||
parser.add_argument('--height', type=int, required=True, |
||||
help='Preprocess input image by resizing to a specific height.') |
||||
parser.add_argument('--rgb', action='store_true', |
||||
help='Indicate that model works with RGB input images instead BGR ones.') |
||||
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, |
||||
help="Choose one of computation backends: " |
||||
"%d: default C++ backend, " |
||||
"%d: Halide language (http://halide-lang.org/), " |
||||
"%d: Intel's Deep Learning Inference Engine (https://software.seek.intel.com/deep-learning-deployment)" % backends) |
||||
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, |
||||
help='Choose one of target computation devices: ' |
||||
'%d: CPU target (by default), ' |
||||
'%d: OpenCL' % targets) |
||||
args = parser.parse_args() |
||||
|
||||
np.random.seed(324) |
||||
|
||||
# Load names of classes |
||||
classes = None |
||||
if args.classes: |
||||
with open(args.classes, 'rt') as f: |
||||
classes = f.read().rstrip('\n').split('\n') |
||||
|
||||
# Load colors |
||||
colors = None |
||||
if args.colors: |
||||
with open(args.colors, 'rt') as f: |
||||
colors = [np.array(color.split(' '), np.uint8) for color in f.read().rstrip('\n').split('\n')] |
||||
|
||||
legend = None |
||||
def showLegend(classes): |
||||
global legend |
||||
if not classes is None and legend is None: |
||||
blockHeight = 30 |
||||
assert(len(classes) == len(colors)) |
||||
|
||||
legend = np.zeros((blockHeight * len(colors), 200, 3), np.uint8) |
||||
for i in range(len(classes)): |
||||
block = legend[i * blockHeight:(i + 1) * blockHeight] |
||||
block[:,:] = colors[i] |
||||
cv.putText(block, classes[i], (0, blockHeight/2), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) |
||||
|
||||
cv.namedWindow('Legend', cv.WINDOW_NORMAL) |
||||
cv.imshow('Legend', legend) |
||||
classes = None |
||||
|
||||
# Load a network |
||||
net = cv.dnn.readNet(args.model, args.config, args.framework) |
||||
net.setPreferableBackend(args.backend) |
||||
net.setPreferableTarget(args.target) |
||||
|
||||
winName = 'Deep learning image classification in OpenCV' |
||||
cv.namedWindow(winName, cv.WINDOW_NORMAL) |
||||
|
||||
cap = cv.VideoCapture(args.input if args.input else 0) |
||||
legend = None |
||||
while cv.waitKey(1) < 0: |
||||
hasFrame, frame = cap.read() |
||||
if not hasFrame: |
||||
cv.waitKey() |
||||
break |
||||
|
||||
# Create a 4D blob from a frame. |
||||
blob = cv.dnn.blobFromImage(frame, args.scale, (args.width, args.height), args.mean, args.rgb, crop=False) |
||||
|
||||
# Run a model |
||||
net.setInput(blob) |
||||
score = net.forward() |
||||
|
||||
numClasses = score.shape[1] |
||||
height = score.shape[2] |
||||
width = score.shape[3] |
||||
|
||||
# Draw segmentation |
||||
if not colors: |
||||
# Generate colors |
||||
colors = [np.array([0, 0, 0], np.uint8)] |
||||
for i in range(1, numClasses): |
||||
colors.append((colors[i - 1] + np.random.randint(0, 256, [3], np.uint8)) / 2) |
||||
|
||||
classIds = np.argmax(score[0], axis=0) |
||||
segm = np.stack([colors[idx] for idx in classIds.flatten()]) |
||||
segm = segm.reshape(height, width, 3) |
||||
|
||||
segm = cv.resize(segm, (frame.shape[1], frame.shape[0]), interpolation=cv.INTER_NEAREST) |
||||
frame = (0.1 * frame + 0.9 * segm).astype(np.uint8) |
||||
|
||||
# Put efficiency information. |
||||
t, _ = net.getPerfProfile() |
||||
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) |
||||
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) |
||||
|
||||
showLegend(classes) |
||||
|
||||
cv.imshow(winName, frame) |
@ -1,110 +0,0 @@ |
||||
// Sample of using Halide backend in OpenCV deep learning module.
|
||||
// Based on caffe_googlenet.cpp.
|
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include <cstdlib> |
||||
|
||||
/* Find best class for the blob (i. e. class with maximal probability) */ |
||||
static void getMaxClass(const Mat &probBlob, int *classId, double *classProb) |
||||
{ |
||||
Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
|
||||
Point classNumber; |
||||
|
||||
minMaxLoc(probMat, NULL, classProb, NULL, &classNumber); |
||||
*classId = classNumber.x; |
||||
} |
||||
|
||||
static std::vector<std::string> readClassNames(const char *filename = "synset_words.txt") |
||||
{ |
||||
std::vector<std::string> classNames; |
||||
|
||||
std::ifstream fp(filename); |
||||
if (!fp.is_open()) |
||||
{ |
||||
std::cerr << "File with classes labels not found: " << filename << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
std::string name; |
||||
while (!fp.eof()) |
||||
{ |
||||
std::getline(fp, name); |
||||
if (name.length()) |
||||
classNames.push_back( name.substr(name.find(' ')+1) ); |
||||
} |
||||
|
||||
fp.close(); |
||||
return classNames; |
||||
} |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
std::string modelTxt = "train_val.prototxt"; |
||||
std::string modelBin = "squeezenet_v1.1.caffemodel"; |
||||
std::string imageFile = (argc > 1) ? argv[1] : "space_shuttle.jpg"; |
||||
|
||||
//! [Read and initialize network]
|
||||
Net net = dnn::readNetFromCaffe(modelTxt, modelBin); |
||||
//! [Read and initialize network]
|
||||
|
||||
//! [Check that network was read successfully]
|
||||
if (net.empty()) |
||||
{ |
||||
std::cerr << "Can't load network by using the following files: " << std::endl; |
||||
std::cerr << "prototxt: " << modelTxt << std::endl; |
||||
std::cerr << "caffemodel: " << modelBin << std::endl; |
||||
std::cerr << "SqueezeNet v1.1 can be downloaded from:" << std::endl; |
||||
std::cerr << "https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1" << std::endl; |
||||
exit(-1); |
||||
} |
||||
//! [Check that network was read successfully]
|
||||
|
||||
//! [Prepare blob]
|
||||
Mat img = imread(imageFile); |
||||
if (img.empty()) |
||||
{ |
||||
std::cerr << "Can't read image from the file: " << imageFile << std::endl; |
||||
exit(-1); |
||||
} |
||||
if (img.channels() != 3) |
||||
{ |
||||
std::cerr << "Image " << imageFile << " isn't 3-channel" << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
Mat inputBlob = blobFromImage(img, 1.0, Size(227, 227), Scalar(), false, false); // Convert Mat to 4-dimensional batch.
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob); // Set the network input.
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Enable Halide backend]
|
||||
net.setPreferableBackend(DNN_BACKEND_HALIDE); // Tell engine to use Halide where it possible.
|
||||
//! [Enable Halide backend]
|
||||
|
||||
//! [Make forward pass]
|
||||
Mat prob = net.forward("prob"); // Compute output.
|
||||
//! [Make forward pass]
|
||||
|
||||
//! [Determine the best class]
|
||||
int classId; |
||||
double classProb; |
||||
getMaxClass(prob, &classId, &classProb); // Find the best class.
|
||||
//! [Determine the best class]
|
||||
|
||||
//! [Print results]
|
||||
std::vector<std::string> classNames = readClassNames(); |
||||
std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl; |
||||
std::cout << "Probability: " << classProb * 100 << "%" << std::endl; |
||||
//! [Print results]
|
||||
|
||||
return 0; |
||||
} //main
|
@ -1,187 +0,0 @@ |
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/dnn/shape_utils.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
#include <iostream> |
||||
|
||||
using namespace cv; |
||||
using namespace std; |
||||
using namespace cv::dnn; |
||||
|
||||
const size_t inWidth = 300; |
||||
const size_t inHeight = 300; |
||||
const float inScaleFactor = 0.007843f; |
||||
const float meanVal = 127.5; |
||||
const char* classNames[] = {"background", |
||||
"aeroplane", "bicycle", "bird", "boat", |
||||
"bottle", "bus", "car", "cat", "chair", |
||||
"cow", "diningtable", "dog", "horse", |
||||
"motorbike", "person", "pottedplant", |
||||
"sheep", "sofa", "train", "tvmonitor"}; |
||||
|
||||
const String keys |
||||
= "{ help | false | print usage }" |
||||
"{ proto | MobileNetSSD_deploy.prototxt | model configuration }" |
||||
"{ model | MobileNetSSD_deploy.caffemodel | model weights }" |
||||
"{ camera_device | 0 | camera device number }" |
||||
"{ camera_width | 640 | camera device width }" |
||||
"{ camera_height | 480 | camera device height }" |
||||
"{ video | | video or image for detection}" |
||||
"{ out | | path to output video file}" |
||||
"{ min_confidence | 0.2 | min confidence }" |
||||
"{ opencl | false | enable OpenCL }" |
||||
; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
CommandLineParser parser(argc, argv, keys); |
||||
parser.about("This sample uses MobileNet Single-Shot Detector " |
||||
"(https://arxiv.org/abs/1704.04861) " |
||||
"to detect objects on camera/video/image.\n" |
||||
".caffemodel model's file is available here: " |
||||
"https://github.com/chuanqi305/MobileNet-SSD\n" |
||||
"Default network is 300x300 and 20-classes VOC.\n"); |
||||
|
||||
if (parser.get<bool>("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String modelConfiguration = parser.get<String>("proto"); |
||||
String modelBinary = parser.get<String>("model"); |
||||
CV_Assert(!modelConfiguration.empty() && !modelBinary.empty()); |
||||
|
||||
//! [Initialize network]
|
||||
dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); |
||||
//! [Initialize network]
|
||||
|
||||
if (parser.get<bool>("opencl")) |
||||
{ |
||||
net.setPreferableTarget(DNN_TARGET_OPENCL); |
||||
} |
||||
|
||||
if (net.empty()) |
||||
{ |
||||
cerr << "Can't load network by using the following files: " << endl; |
||||
cerr << "prototxt: " << modelConfiguration << endl; |
||||
cerr << "caffemodel: " << modelBinary << endl; |
||||
cerr << "Models can be downloaded here:" << endl; |
||||
cerr << "https://github.com/chuanqi305/MobileNet-SSD" << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
VideoCapture cap; |
||||
if (!parser.has("video")) |
||||
{ |
||||
int cameraDevice = parser.get<int>("camera_device"); |
||||
cap = VideoCapture(cameraDevice); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
||||
return -1; |
||||
} |
||||
|
||||
cap.set(CAP_PROP_FRAME_WIDTH, parser.get<int>("camera_width")); |
||||
cap.set(CAP_PROP_FRAME_HEIGHT, parser.get<int>("camera_height")); |
||||
} |
||||
else |
||||
{ |
||||
cap.open(parser.get<String>("video")); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
||||
return -1; |
||||
} |
||||
} |
||||
|
||||
//Acquire input size
|
||||
Size inVideoSize((int) cap.get(CAP_PROP_FRAME_WIDTH), |
||||
(int) cap.get(CAP_PROP_FRAME_HEIGHT)); |
||||
|
||||
double fps = cap.get(CAP_PROP_FPS); |
||||
int fourcc = static_cast<int>(cap.get(CAP_PROP_FOURCC)); |
||||
VideoWriter outputVideo; |
||||
outputVideo.open(parser.get<String>("out") , |
||||
(fourcc != 0 ? fourcc : VideoWriter::fourcc('M','J','P','G')), |
||||
(fps != 0 ? fps : 10.0), inVideoSize, true); |
||||
|
||||
for(;;) |
||||
{ |
||||
Mat frame; |
||||
cap >> frame; // get a new frame from camera/video or read image
|
||||
|
||||
if (frame.empty()) |
||||
{ |
||||
waitKey(); |
||||
break; |
||||
} |
||||
|
||||
if (frame.channels() == 4) |
||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
||||
|
||||
//! [Prepare blob]
|
||||
Mat inputBlob = blobFromImage(frame, inScaleFactor, |
||||
Size(inWidth, inHeight), |
||||
Scalar(meanVal, meanVal, meanVal), |
||||
false, false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Make forward pass]
|
||||
Mat detection = net.forward(); //compute output
|
||||
//! [Make forward pass]
|
||||
|
||||
vector<double> layersTimings; |
||||
double freq = getTickFrequency() / 1000; |
||||
double time = net.getPerfProfile(layersTimings) / freq; |
||||
|
||||
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); |
||||
|
||||
if (!outputVideo.isOpened()) |
||||
{ |
||||
putText(frame, format("FPS: %.2f ; time: %.2f ms", 1000.f/time, time), |
||||
Point(20,20), 0, 0.5, Scalar(0,0,255)); |
||||
} |
||||
else |
||||
cout << "Inference time, ms: " << time << endl; |
||||
|
||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
||||
for(int i = 0; i < detectionMat.rows; i++) |
||||
{ |
||||
float confidence = detectionMat.at<float>(i, 2); |
||||
|
||||
if(confidence > confidenceThreshold) |
||||
{ |
||||
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1)); |
||||
|
||||
int left = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); |
||||
int top = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); |
||||
int right = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols); |
||||
int bottom = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows); |
||||
|
||||
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0)); |
||||
String label = format("%s: %.2f", classNames[objectClass], confidence); |
||||
int baseLine = 0; |
||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
||||
top = max(top, labelSize.height); |
||||
rectangle(frame, Point(left, top - labelSize.height), |
||||
Point(left + labelSize.width, top + baseLine), |
||||
Scalar(255, 255, 255), FILLED); |
||||
putText(frame, label, Point(left, top), |
||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
||||
} |
||||
} |
||||
|
||||
if (outputVideo.isOpened()) |
||||
outputVideo << frame; |
||||
|
||||
imshow("detections", frame); |
||||
if (waitKey(1) >= 0) break; |
||||
} |
||||
|
||||
return 0; |
||||
} // main
|
@ -1,156 +0,0 @@ |
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/dnn/shape_utils.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
#include <iostream> |
||||
|
||||
using namespace cv; |
||||
using namespace std; |
||||
using namespace cv::dnn; |
||||
|
||||
const char* classNames[] = {"background", |
||||
"aeroplane", "bicycle", "bird", "boat", |
||||
"bottle", "bus", "car", "cat", "chair", |
||||
"cow", "diningtable", "dog", "horse", |
||||
"motorbike", "person", "pottedplant", |
||||
"sheep", "sofa", "train", "tvmonitor"}; |
||||
|
||||
const char* about = "This sample uses Single-Shot Detector " |
||||
"(https://arxiv.org/abs/1512.02325) " |
||||
"to detect objects on camera/video/image.\n" |
||||
".caffemodel model's file is available here: " |
||||
"https://github.com/weiliu89/caffe/tree/ssd#models\n" |
||||
"Default network is 300x300 and 20-classes VOC.\n"; |
||||
|
||||
const char* params |
||||
= "{ help | false | print usage }" |
||||
"{ proto | | model configuration }" |
||||
"{ model | | model weights }" |
||||
"{ camera_device | 0 | camera device number}" |
||||
"{ video | | video or image for detection}" |
||||
"{ min_confidence | 0.5 | min confidence }"; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
cv::CommandLineParser parser(argc, argv, params); |
||||
|
||||
if (parser.get<bool>("help")) |
||||
{ |
||||
cout << about << endl; |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String modelConfiguration = parser.get<string>("proto"); |
||||
String modelBinary = parser.get<string>("model"); |
||||
|
||||
//! [Initialize network]
|
||||
dnn::Net net = readNetFromCaffe(modelConfiguration, modelBinary); |
||||
//! [Initialize network]
|
||||
|
||||
if (net.empty()) |
||||
{ |
||||
cerr << "Can't load network by using the following files: " << endl; |
||||
cerr << "prototxt: " << modelConfiguration << endl; |
||||
cerr << "caffemodel: " << modelBinary << endl; |
||||
cerr << "Models can be downloaded here:" << endl; |
||||
cerr << "https://github.com/weiliu89/caffe/tree/ssd#models" << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
VideoCapture cap; |
||||
if (parser.get<String>("video").empty()) |
||||
{ |
||||
int cameraDevice = parser.get<int>("camera_device"); |
||||
cap = VideoCapture(cameraDevice); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
||||
return -1; |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
cap.open(parser.get<String>("video")); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
||||
return -1; |
||||
} |
||||
} |
||||
|
||||
for (;;) |
||||
{ |
||||
cv::Mat frame; |
||||
cap >> frame; // get a new frame from camera/video or read image
|
||||
|
||||
if (frame.empty()) |
||||
{ |
||||
waitKey(); |
||||
break; |
||||
} |
||||
|
||||
if (frame.channels() == 4) |
||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
||||
|
||||
//! [Prepare blob]
|
||||
Mat inputBlob = blobFromImage(frame, 1.0f, Size(300, 300), Scalar(104, 117, 123), false, false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob, "data"); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Make forward pass]
|
||||
Mat detection = net.forward("detection_out"); //compute output
|
||||
//! [Make forward pass]
|
||||
|
||||
vector<double> layersTimings; |
||||
double freq = getTickFrequency() / 1000; |
||||
double time = net.getPerfProfile(layersTimings) / freq; |
||||
ostringstream ss; |
||||
ss << "FPS: " << 1000/time << " ; time: " << time << " ms"; |
||||
putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255)); |
||||
|
||||
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>()); |
||||
|
||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
||||
for(int i = 0; i < detectionMat.rows; i++) |
||||
{ |
||||
float confidence = detectionMat.at<float>(i, 2); |
||||
|
||||
if(confidence > confidenceThreshold) |
||||
{ |
||||
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1)); |
||||
|
||||
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols); |
||||
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows); |
||||
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols); |
||||
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows); |
||||
|
||||
ss.str(""); |
||||
ss << confidence; |
||||
String conf(ss.str()); |
||||
|
||||
Rect object(xLeftBottom, yLeftBottom, |
||||
xRightTop - xLeftBottom, |
||||
yRightTop - yLeftBottom); |
||||
|
||||
rectangle(frame, object, Scalar(0, 255, 0)); |
||||
String label = String(classNames[objectClass]) + ": " + conf; |
||||
int baseLine = 0; |
||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
||||
rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height), |
||||
Size(labelSize.width, labelSize.height + baseLine)), |
||||
Scalar(255, 255, 255), FILLED); |
||||
putText(frame, label, Point(xLeftBottom, yLeftBottom), |
||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
||||
} |
||||
} |
||||
|
||||
imshow("detections", frame); |
||||
if (waitKey(1) >= 0) break; |
||||
} |
||||
|
||||
return 0; |
||||
} // main
|
@ -1,154 +0,0 @@ |
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// Copyright (C) 2016, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
/*
|
||||
Sample of using OpenCV dnn module with Tensorflow Inception model. |
||||
*/ |
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include <cstdlib> |
||||
using namespace std; |
||||
|
||||
const String keys = |
||||
"{help h || Sample app for loading Inception TensorFlow model. " |
||||
"The model and class names list can be downloaded here: " |
||||
"https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip }" |
||||
"{model m |tensorflow_inception_graph.pb| path to TensorFlow .pb model file }" |
||||
"{image i || path to image file }" |
||||
"{i_blob | input | input blob name) }" |
||||
"{o_blob | softmax2 | output blob name) }" |
||||
"{c_names c | imagenet_comp_graph_label_strings.txt | path to file with classnames for class id }" |
||||
"{result r || path to save output blob (optional, binary format, NCHW order) }" |
||||
; |
||||
|
||||
void getMaxClass(const Mat &probBlob, int *classId, double *classProb); |
||||
std::vector<String> readClassNames(const char *filename); |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
cv::CommandLineParser parser(argc, argv, keys); |
||||
|
||||
if (parser.has("help")) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String modelFile = parser.get<String>("model"); |
||||
String imageFile = parser.get<String>("image"); |
||||
String inBlobName = parser.get<String>("i_blob"); |
||||
String outBlobName = parser.get<String>("o_blob"); |
||||
|
||||
if (!parser.check()) |
||||
{ |
||||
parser.printErrors(); |
||||
return 0; |
||||
} |
||||
|
||||
String classNamesFile = parser.get<String>("c_names"); |
||||
String resultFile = parser.get<String>("result"); |
||||
|
||||
//! [Initialize network]
|
||||
dnn::Net net = readNetFromTensorflow(modelFile); |
||||
//! [Initialize network]
|
||||
|
||||
if (net.empty()) |
||||
{ |
||||
std::cerr << "Can't load network by using the mode file: " << std::endl; |
||||
std::cerr << modelFile << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
//! [Prepare blob]
|
||||
Mat img = imread(imageFile); |
||||
if (img.empty()) |
||||
{ |
||||
std::cerr << "Can't read image from the file: " << imageFile << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
Mat inputBlob = blobFromImage(img, 1.0f, Size(224, 224), Scalar(), true, false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
inputBlob -= 117.0; |
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob, inBlobName); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
cv::TickMeter tm; |
||||
tm.start(); |
||||
|
||||
//! [Make forward pass]
|
||||
Mat result = net.forward(outBlobName); //compute output
|
||||
//! [Make forward pass]
|
||||
|
||||
tm.stop(); |
||||
|
||||
if (!resultFile.empty()) { |
||||
CV_Assert(result.isContinuous()); |
||||
|
||||
ofstream fout(resultFile.c_str(), ios::out | ios::binary); |
||||
fout.write((char*)result.data, result.total() * sizeof(float)); |
||||
fout.close(); |
||||
} |
||||
|
||||
std::cout << "Output blob shape " << result.size[0] << " x " << result.size[1] << " x " << result.size[2] << " x " << result.size[3] << std::endl; |
||||
std::cout << "Inference time, ms: " << tm.getTimeMilli() << std::endl; |
||||
|
||||
if (!classNamesFile.empty()) { |
||||
std::vector<String> classNames = readClassNames(classNamesFile.c_str()); |
||||
|
||||
int classId; |
||||
double classProb; |
||||
getMaxClass(result, &classId, &classProb);//find the best class
|
||||
|
||||
//! [Print results]
|
||||
std::cout << "Best class: #" << classId << " '" << classNames.at(classId) << "'" << std::endl; |
||||
std::cout << "Probability: " << classProb * 100 << "%" << std::endl; |
||||
} |
||||
return 0; |
||||
} //main
|
||||
|
||||
|
||||
/* Find best class for the blob (i. e. class with maximal probability) */ |
||||
void getMaxClass(const Mat &probBlob, int *classId, double *classProb) |
||||
{ |
||||
Mat probMat = probBlob.reshape(1, 1); //reshape the blob to 1x1000 matrix
|
||||
Point classNumber; |
||||
|
||||
minMaxLoc(probMat, NULL, classProb, NULL, &classNumber); |
||||
*classId = classNumber.x; |
||||
} |
||||
|
||||
std::vector<String> readClassNames(const char *filename) |
||||
{ |
||||
std::vector<String> classNames; |
||||
|
||||
std::ifstream fp(filename); |
||||
if (!fp.is_open()) |
||||
{ |
||||
std::cerr << "File with classes labels not found: " << filename << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
std::string name; |
||||
while (!fp.eof()) |
||||
{ |
||||
std::getline(fp, name); |
||||
if (name.length()) |
||||
classNames.push_back( name ); |
||||
} |
||||
|
||||
fp.close(); |
||||
return classNames; |
||||
} |
@ -1,175 +0,0 @@ |
||||
/*
|
||||
Sample of using OpenCV dnn module with Torch ENet model. |
||||
*/ |
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
#include <fstream> |
||||
#include <iostream> |
||||
#include <cstdlib> |
||||
#include <sstream> |
||||
using namespace std; |
||||
|
||||
const String keys = |
||||
"{help h || Sample app for loading ENet Torch model. " |
||||
"The model and class names list can be downloaded here: " |
||||
"https://www.dropbox.com/sh/dywzk3gyb12hpe5/AAD5YkUa8XgMpHs2gCRgmCVCa }" |
||||
"{model m || path to Torch .net model file (model_best.net) }" |
||||
"{image i || path to image file }" |
||||
"{result r || path to save output blob (optional, binary format, NCHW order) }" |
||||
"{show s || whether to show all output channels or not}" |
||||
"{o_blob || output blob's name. If empty, last blob's name in net is used}"; |
||||
|
||||
static const int kNumClasses = 20; |
||||
|
||||
static const String classes[] = { |
||||
"Background", "Road", "Sidewalk", "Building", "Wall", "Fence", "Pole", |
||||
"TrafficLight", "TrafficSign", "Vegetation", "Terrain", "Sky", "Person", |
||||
"Rider", "Car", "Truck", "Bus", "Train", "Motorcycle", "Bicycle" |
||||
}; |
||||
|
||||
static const Vec3b colors[] = { |
||||
Vec3b(0, 0, 0), Vec3b(244, 126, 205), Vec3b(254, 83, 132), Vec3b(192, 200, 189), |
||||
Vec3b(50, 56, 251), Vec3b(65, 199, 228), Vec3b(240, 178, 193), Vec3b(201, 67, 188), |
||||
Vec3b(85, 32, 33), Vec3b(116, 25, 18), Vec3b(162, 33, 72), Vec3b(101, 150, 210), |
||||
Vec3b(237, 19, 16), Vec3b(149, 197, 72), Vec3b(80, 182, 21), Vec3b(141, 5, 207), |
||||
Vec3b(189, 156, 39), Vec3b(235, 170, 186), Vec3b(133, 109, 144), Vec3b(231, 160, 96) |
||||
}; |
||||
|
||||
static void showLegend(); |
||||
|
||||
static void colorizeSegmentation(const Mat &score, Mat &segm); |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
CommandLineParser parser(argc, argv, keys); |
||||
|
||||
if (parser.has("help") || argc == 1) |
||||
{ |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String modelFile = parser.get<String>("model"); |
||||
String imageFile = parser.get<String>("image"); |
||||
|
||||
if (!parser.check()) |
||||
{ |
||||
parser.printErrors(); |
||||
return 0; |
||||
} |
||||
|
||||
String resultFile = parser.get<String>("result"); |
||||
|
||||
//! [Read model and initialize network]
|
||||
dnn::Net net = dnn::readNetFromTorch(modelFile); |
||||
|
||||
//! [Prepare blob]
|
||||
Mat img = imread(imageFile), input; |
||||
if (img.empty()) |
||||
{ |
||||
std::cerr << "Can't read image from the file: " << imageFile << std::endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
Mat inputBlob = blobFromImage(img, 1./255, Size(1024, 512), Scalar(), true, false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
TickMeter tm; |
||||
|
||||
String oBlob = net.getLayerNames().back(); |
||||
if (!parser.get<String>("o_blob").empty()) |
||||
{ |
||||
oBlob = parser.get<String>("o_blob"); |
||||
} |
||||
|
||||
//! [Make forward pass]
|
||||
tm.start(); |
||||
Mat result = net.forward(oBlob); |
||||
tm.stop(); |
||||
|
||||
if (!resultFile.empty()) { |
||||
CV_Assert(result.isContinuous()); |
||||
|
||||
ofstream fout(resultFile.c_str(), ios::out | ios::binary); |
||||
fout.write((char*)result.data, result.total() * sizeof(float)); |
||||
fout.close(); |
||||
} |
||||
|
||||
std::cout << "Output blob: " << result.size[0] << " x " << result.size[1] << " x " << result.size[2] << " x " << result.size[3] << "\n"; |
||||
std::cout << "Inference time, ms: " << tm.getTimeMilli() << std::endl; |
||||
|
||||
if (parser.has("show")) |
||||
{ |
||||
Mat segm, show; |
||||
colorizeSegmentation(result, segm); |
||||
showLegend(); |
||||
|
||||
cv::resize(segm, segm, img.size(), 0, 0, cv::INTER_NEAREST); |
||||
addWeighted(img, 0.1, segm, 0.9, 0.0, show); |
||||
|
||||
imshow("Result", show); |
||||
waitKey(); |
||||
} |
||||
return 0; |
||||
} //main
|
||||
|
||||
static void showLegend() |
||||
{ |
||||
static const int kBlockHeight = 30; |
||||
|
||||
cv::Mat legend(kBlockHeight * kNumClasses, 200, CV_8UC3); |
||||
for(int i = 0; i < kNumClasses; i++) |
||||
{ |
||||
cv::Mat block = legend.rowRange(i * kBlockHeight, (i + 1) * kBlockHeight); |
||||
block.setTo(colors[i]); |
||||
putText(block, classes[i], Point(0, kBlockHeight / 2), FONT_HERSHEY_SIMPLEX, 0.5, Vec3b(255, 255, 255)); |
||||
} |
||||
imshow("Legend", legend); |
||||
} |
||||
|
||||
static void colorizeSegmentation(const Mat &score, Mat &segm) |
||||
{ |
||||
const int rows = score.size[2]; |
||||
const int cols = score.size[3]; |
||||
const int chns = score.size[1]; |
||||
|
||||
Mat maxCl = Mat::zeros(rows, cols, CV_8UC1); |
||||
Mat maxVal(rows, cols, CV_32FC1, score.data); |
||||
for (int ch = 1; ch < chns; ch++) |
||||
{ |
||||
for (int row = 0; row < rows; row++) |
||||
{ |
||||
const float *ptrScore = score.ptr<float>(0, ch, row); |
||||
uint8_t *ptrMaxCl = maxCl.ptr<uint8_t>(row); |
||||
float *ptrMaxVal = maxVal.ptr<float>(row); |
||||
for (int col = 0; col < cols; col++) |
||||
{ |
||||
if (ptrScore[col] > ptrMaxVal[col]) |
||||
{ |
||||
ptrMaxVal[col] = ptrScore[col]; |
||||
ptrMaxCl[col] = (uchar)ch; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
segm.create(rows, cols, CV_8UC3); |
||||
for (int row = 0; row < rows; row++) |
||||
{ |
||||
const uchar *ptrMaxCl = maxCl.ptr<uchar>(row); |
||||
Vec3b *ptrSegm = segm.ptr<Vec3b>(row); |
||||
for (int col = 0; col < cols; col++) |
||||
{ |
||||
ptrSegm[col] = colors[ptrMaxCl[col]]; |
||||
} |
||||
} |
||||
} |
@ -1,185 +0,0 @@ |
||||
// Brief Sample of using OpenCV dnn module in real time with device capture, video and image.
|
||||
// VIDEO DEMO: https://www.youtube.com/watch?v=NHtRlndE2cg
|
||||
|
||||
#include <opencv2/dnn.hpp> |
||||
#include <opencv2/dnn/shape_utils.hpp> |
||||
#include <opencv2/imgproc.hpp> |
||||
#include <opencv2/highgui.hpp> |
||||
#include <fstream> |
||||
#include <iostream> |
||||
|
||||
using namespace std; |
||||
using namespace cv; |
||||
using namespace cv::dnn; |
||||
|
||||
static const char* about = |
||||
"This sample uses You only look once (YOLO)-Detector (https://arxiv.org/abs/1612.08242) to detect objects on camera/video/image.\n" |
||||
"Models can be downloaded here: https://pjreddie.com/darknet/yolo/\n" |
||||
"Default network is 416x416.\n" |
||||
"Class names can be downloaded here: https://github.com/pjreddie/darknet/tree/master/data\n"; |
||||
|
||||
static const char* params = |
||||
"{ help | false | print usage }" |
||||
"{ cfg | | model configuration }" |
||||
"{ model | | model weights }" |
||||
"{ camera_device | 0 | camera device number}" |
||||
"{ source | | video or image for detection}" |
||||
"{ out | | path to output video file}" |
||||
"{ fps | 3 | frame per second }" |
||||
"{ style | box | box or line style draw }" |
||||
"{ min_confidence | 0.24 | min confidence }" |
||||
"{ class_names | | File with class names, [PATH-TO-DARKNET]/data/coco.names }"; |
||||
|
||||
int main(int argc, char** argv) |
||||
{ |
||||
CommandLineParser parser(argc, argv, params); |
||||
|
||||
if (parser.get<bool>("help")) |
||||
{ |
||||
cout << about << endl; |
||||
parser.printMessage(); |
||||
return 0; |
||||
} |
||||
|
||||
String modelConfiguration = parser.get<String>("cfg"); |
||||
String modelBinary = parser.get<String>("model"); |
||||
|
||||
//! [Initialize network]
|
||||
dnn::Net net = readNetFromDarknet(modelConfiguration, modelBinary); |
||||
//! [Initialize network]
|
||||
|
||||
if (net.empty()) |
||||
{ |
||||
cerr << "Can't load network by using the following files: " << endl; |
||||
cerr << "cfg-file: " << modelConfiguration << endl; |
||||
cerr << "weights-file: " << modelBinary << endl; |
||||
cerr << "Models can be downloaded here:" << endl; |
||||
cerr << "https://pjreddie.com/darknet/yolo/" << endl; |
||||
exit(-1); |
||||
} |
||||
|
||||
VideoCapture cap; |
||||
VideoWriter writer; |
||||
int codec = CV_FOURCC('M', 'J', 'P', 'G'); |
||||
double fps = parser.get<float>("fps"); |
||||
if (parser.get<String>("source").empty()) |
||||
{ |
||||
int cameraDevice = parser.get<int>("camera_device"); |
||||
cap = VideoCapture(cameraDevice); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't find camera: " << cameraDevice << endl; |
||||
return -1; |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
cap.open(parser.get<String>("source")); |
||||
if(!cap.isOpened()) |
||||
{ |
||||
cout << "Couldn't open image or video: " << parser.get<String>("video") << endl; |
||||
return -1; |
||||
} |
||||
} |
||||
|
||||
if(!parser.get<String>("out").empty()) |
||||
{ |
||||
writer.open(parser.get<String>("out"), codec, fps, Size((int)cap.get(CAP_PROP_FRAME_WIDTH),(int)cap.get(CAP_PROP_FRAME_HEIGHT)), 1); |
||||
} |
||||
|
||||
vector<String> classNamesVec; |
||||
ifstream classNamesFile(parser.get<String>("class_names").c_str()); |
||||
if (classNamesFile.is_open()) |
||||
{ |
||||
string className = ""; |
||||
while (std::getline(classNamesFile, className)) |
||||
classNamesVec.push_back(className); |
||||
} |
||||
|
||||
String object_roi_style = parser.get<String>("style"); |
||||
|
||||
for(;;) |
||||
{ |
||||
Mat frame; |
||||
cap >> frame; // get a new frame from camera/video or read image
|
||||
|
||||
if (frame.empty()) |
||||
{ |
||||
waitKey(); |
||||
break; |
||||
} |
||||
|
||||
if (frame.channels() == 4) |
||||
cvtColor(frame, frame, COLOR_BGRA2BGR); |
||||
|
||||
//! [Prepare blob]
|
||||
Mat inputBlob = blobFromImage(frame, 1 / 255.F, Size(416, 416), Scalar(), true, false); //Convert Mat to batch of images
|
||||
//! [Prepare blob]
|
||||
|
||||
//! [Set input blob]
|
||||
net.setInput(inputBlob, "data"); //set the network input
|
||||
//! [Set input blob]
|
||||
|
||||
//! [Make forward pass]
|
||||
Mat detectionMat = net.forward("detection_out"); //compute output
|
||||
//! [Make forward pass]
|
||||
|
||||
vector<double> layersTimings; |
||||
double tick_freq = getTickFrequency(); |
||||
double time_ms = net.getPerfProfile(layersTimings) / tick_freq * 1000; |
||||
putText(frame, format("FPS: %.2f ; time: %.2f ms", 1000.f / time_ms, time_ms), |
||||
Point(20, 20), 0, 0.5, Scalar(0, 0, 255)); |
||||
|
||||
float confidenceThreshold = parser.get<float>("min_confidence"); |
||||
for (int i = 0; i < detectionMat.rows; i++) |
||||
{ |
||||
const int probability_index = 5; |
||||
const int probability_size = detectionMat.cols - probability_index; |
||||
float *prob_array_ptr = &detectionMat.at<float>(i, probability_index); |
||||
|
||||
size_t objectClass = max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr; |
||||
float confidence = detectionMat.at<float>(i, (int)objectClass + probability_index); |
||||
|
||||
if (confidence > confidenceThreshold) |
||||
{ |
||||
float x_center = detectionMat.at<float>(i, 0) * frame.cols; |
||||
float y_center = detectionMat.at<float>(i, 1) * frame.rows; |
||||
float width = detectionMat.at<float>(i, 2) * frame.cols; |
||||
float height = detectionMat.at<float>(i, 3) * frame.rows; |
||||
Point p1(cvRound(x_center - width / 2), cvRound(y_center - height / 2)); |
||||
Point p2(cvRound(x_center + width / 2), cvRound(y_center + height / 2)); |
||||
Rect object(p1, p2); |
||||
|
||||
Scalar object_roi_color(0, 255, 0); |
||||
|
||||
if (object_roi_style == "box") |
||||
{ |
||||
rectangle(frame, object, object_roi_color); |
||||
} |
||||
else |
||||
{ |
||||
Point p_center(cvRound(x_center), cvRound(y_center)); |
||||
line(frame, object.tl(), p_center, object_roi_color, 1); |
||||
} |
||||
|
||||
String className = objectClass < classNamesVec.size() ? classNamesVec[objectClass] : cv::format("unknown(%d)", objectClass); |
||||
String label = format("%s: %.2f", className.c_str(), confidence); |
||||
int baseLine = 0; |
||||
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); |
||||
rectangle(frame, Rect(p1, Size(labelSize.width, labelSize.height + baseLine)), |
||||
object_roi_color, FILLED); |
||||
putText(frame, label, p1 + Point(0, labelSize.height), |
||||
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); |
||||
} |
||||
} |
||||
if(writer.isOpened()) |
||||
{ |
||||
writer.write(frame); |
||||
} |
||||
|
||||
imshow("YOLO: Detections", frame); |
||||
if (waitKey(1) >= 0) break; |
||||
} |
||||
|
||||
return 0; |
||||
} // main
|
Loading…
Reference in new issue