@ -1,5 +1,6 @@
# include <fstream>
# include <fstream>
# include <sstream>
# include <sstream>
# include <iostream>
# include <opencv2/dnn.hpp>
# include <opencv2/dnn.hpp>
# include <opencv2/imgproc.hpp>
# include <opencv2/imgproc.hpp>
@ -7,50 +8,54 @@
# include "common.hpp"
# include "common.hpp"
std : : string param_keys =
" { help h | | Print help message. } "
" { @alias | | An alias name of model to extract preprocessing parameters from models.yml file. } "
" { zoo | models.yml | An optional path to file with preprocessing parameters } "
" { device | 0 | camera device number. } "
" { input i | | Path to input image or video file. Skip this argument to capture frames from a camera. } "
" { framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. } "
" { classes | | Optional path to a text file with names of classes. } "
" { colors | | Optional path to a text file with colors for an every class. "
" An every color is represented with three values from 0 to 255 in BGR channels order. } " ;
std : : string backend_keys = cv : : format (
" { backend | 0 | Choose one of computation backends: "
" %d: automatically (by default), "
" %d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
" %d: OpenCV implementation, "
" %d: VKCOM, "
" %d: CUDA } " , cv : : dnn : : DNN_BACKEND_DEFAULT , cv : : dnn : : DNN_BACKEND_INFERENCE_ENGINE , cv : : dnn : : DNN_BACKEND_OPENCV , cv : : dnn : : DNN_BACKEND_VKCOM , cv : : dnn : : DNN_BACKEND_CUDA ) ;
std : : string target_keys = cv : : format (
" { target | 0 | Choose one of target computation devices: "
" %d: CPU target (by default), "
" %d: OpenCL, "
" %d: OpenCL fp16 (half-float precision), "
" %d: VPU, "
" %d: Vulkan, "
" %d: CUDA, "
" %d: CUDA fp16 (half-float preprocess) } " , cv : : dnn : : DNN_TARGET_CPU , cv : : dnn : : DNN_TARGET_OPENCL , cv : : dnn : : DNN_TARGET_OPENCL_FP16 , cv : : dnn : : DNN_TARGET_MYRIAD , cv : : dnn : : DNN_TARGET_VULKAN , cv : : dnn : : DNN_TARGET_CUDA , cv : : dnn : : DNN_TARGET_CUDA_FP16 ) ;
std : : string keys = param_keys + backend_keys + target_keys ;
using namespace cv ;
using namespace cv ;
using namespace std ;
using namespace dnn ;
using namespace dnn ;
std : : vector < std : : string > classes ;
const string param_keys =
std : : vector < Vec3b > colors ;
" { help h | | Print help message. } "
" { @alias | | An alias name of model to extract preprocessing parameters from models.yml file. } "
" { zoo | models.yml | An optional path to file with preprocessing parameters } "
" { device | 0 | camera device number. } "
" { input i | | Path to input image or video file. Skip this argument to capture frames from a camera. } "
" { classes | | Optional path to a text file with names of classes. } "
" { colors | | Optional path to a text file with colors for an every class. "
" Every color is represented with three values from 0 to 255 in BGR channels order. } " ;
const string backend_keys = format (
" { backend | 0 | Choose one of computation backends: "
" %d: automatically (by default), "
" %d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
" %d: OpenCV implementation, "
" %d: VKCOM, "
" %d: CUDA } " ,
DNN_BACKEND_DEFAULT , DNN_BACKEND_INFERENCE_ENGINE , DNN_BACKEND_OPENCV , DNN_BACKEND_VKCOM , DNN_BACKEND_CUDA ) ;
const string target_keys = format (
" { target | 0 | Choose one of target computation devices: "
" %d: CPU target (by default), "
" %d: OpenCL, "
" %d: OpenCL fp16 (half-float precision), "
" %d: VPU, "
" %d: Vulkan, "
" %d: CUDA, "
" %d: CUDA fp16 (half-float preprocess) } " ,
DNN_TARGET_CPU , DNN_TARGET_OPENCL , DNN_TARGET_OPENCL_FP16 , DNN_TARGET_MYRIAD , DNN_TARGET_VULKAN , DNN_TARGET_CUDA , DNN_TARGET_CUDA_FP16 ) ;
string keys = param_keys + backend_keys + target_keys ;
vector < string > classes ;
vector < Vec3b > colors ;
void showLegend ( ) ;
void showLegend ( ) ;
void colorizeSegmentation ( const Mat & score , Mat & segm ) ;
void colorizeSegmentation ( const Mat & score , Mat & segm ) ;
int main ( int argc , char * * argv )
int main ( int argc , char * * argv )
{
{
CommandLineParser parser ( argc , argv , keys ) ;
CommandLineParser parser ( argc , argv , keys ) ;
const std : : string modelName = parser . get < String > ( " @alias " ) ;
const string modelName = parser . get < String > ( " @alias " ) ;
const std : : string zooFile = parser . get < String > ( " zoo " ) ;
const string zooFile = parser . get < String > ( " zoo " ) ;
keys + = genPreprocArguments ( modelName , zooFile ) ;
keys + = genPreprocArguments ( modelName , zooFile ) ;
@ -68,36 +73,33 @@ int main(int argc, char** argv)
int inpWidth = parser . get < int > ( " width " ) ;
int inpWidth = parser . get < int > ( " width " ) ;
int inpHeight = parser . get < int > ( " height " ) ;
int inpHeight = parser . get < int > ( " height " ) ;
String model = findFile ( parser . get < String > ( " model " ) ) ;
String model = findFile ( parser . get < String > ( " model " ) ) ;
String config = findFile ( parser . get < String > ( " config " ) ) ;
String framework = parser . get < String > ( " framework " ) ;
int backendId = parser . get < int > ( " backend " ) ;
int backendId = parser . get < int > ( " backend " ) ;
int targetId = parser . get < int > ( " target " ) ;
int targetId = parser . get < int > ( " target " ) ;
// Open file with classes names.
// Open file with classes names.
if ( parser . has ( " classes " ) )
if ( parser . has ( " classes " ) )
{
{
std : : st ring file = parser . get < String > ( " classes " ) ;
string file = parser . get < String > ( " classes " ) ;
std : : ifstream ifs ( file . c_str ( ) ) ;
ifstream ifs ( file . c_str ( ) ) ;
if ( ! ifs . is_open ( ) )
if ( ! ifs . is_open ( ) )
CV_Error ( Error : : StsError , " File " + file + " not found " ) ;
CV_Error ( Error : : StsError , " File " + file + " not found " ) ;
std : : st ring line ;
string line ;
while ( std : : getline ( ifs , line ) )
while ( getline ( ifs , line ) )
{
{
classes . push_back ( line ) ;
classes . push_back ( line ) ;
}
}
}
}
// Open file with colors.
// Open file with colors.
if ( parser . has ( " colors " ) )
if ( parser . has ( " colors " ) )
{
{
std : : st ring file = parser . get < String > ( " colors " ) ;
string file = parser . get < String > ( " colors " ) ;
std : : ifstream ifs ( file . c_str ( ) ) ;
ifstream ifs ( file . c_str ( ) ) ;
if ( ! ifs . is_open ( ) )
if ( ! ifs . is_open ( ) )
CV_Error ( Error : : StsError , " File " + file + " not found " ) ;
CV_Error ( Error : : StsError , " File " + file + " not found " ) ;
std : : st ring line ;
string line ;
while ( std : : getline ( ifs , line ) )
while ( getline ( ifs , line ) )
{
{
std : : istringstream colorStr ( line . c_str ( ) ) ;
istringstream colorStr ( line . c_str ( ) ) ;
Vec3b color ;
Vec3b color ;
for ( int i = 0 ; i < 3 & & ! colorStr . eof ( ) ; + + i )
for ( int i = 0 ; i < 3 & & ! colorStr . eof ( ) ; + + i )
@ -114,23 +116,21 @@ int main(int argc, char** argv)
CV_Assert ( ! model . empty ( ) ) ;
CV_Assert ( ! model . empty ( ) ) ;
//! [Read and initialize network]
//! [Read and initialize network]
Net net = readNet ( model , config , framework ) ;
Net net = readNetFromONNX ( model ) ;
net . setPreferableBackend ( backendId ) ;
net . setPreferableBackend ( backendId ) ;
net . setPreferableTarget ( targetId ) ;
net . setPreferableTarget ( targetId ) ;
//! [Read and initialize network]
//! [Read and initialize network]
// Create a window
// Create a window
static const std : : st ring kWinName = " Deep learning semantic segmentation in OpenCV " ;
static const string kWinName = " Deep learning semantic segmentation in OpenCV " ;
namedWindow ( kWinName , WINDOW_NORMAL ) ;
namedWindow ( kWinName , WINDOW_NORMAL ) ;
//! [Open a video file or an image file or a camera stream]
//! [Open a video file or an image file or a camera stream]
VideoCapture cap ;
VideoCapture cap ;
if ( parser . has ( " input " ) )
if ( parser . has ( " input " ) )
cap . open ( parser . get < String > ( " input " ) ) ;
cap . open ( findFile ( parser . get < String > ( " input " ) ) ) ;
else
else
cap . open ( parser . get < int > ( " device " ) ) ;
cap . open ( parser . get < int > ( " device " ) ) ;
//! [Open a video file or an image file or a camera stream]
//! [Open a video file or an image file or a camera stream]
// Process frames.
// Process frames.
Mat frame , blob ;
Mat frame , blob ;
while ( waitKey ( 1 ) < 0 )
while ( waitKey ( 1 ) < 0 )
@ -141,29 +141,45 @@ int main(int argc, char** argv)
waitKey ( ) ;
waitKey ( ) ;
break ;
break ;
}
}
imshow ( " Original Image " , frame ) ;
//! [Create a 4D blob from a frame]
//! [Create a 4D blob from a frame]
blobFromImage ( frame , blob , scale , Size ( inpWidth , inpHeight ) , mean , swapRB , false ) ;
blobFromImage ( frame , blob , scale , Size ( inpWidth , inpHeight ) , mean , swapRB , false ) ;
//! [Create a 4D blob from a frame]
//! [Set input blob]
//! [Set input blob]
net . setInput ( blob ) ;
net . setInput ( blob ) ;
//! [Set input blob]
//! [Make forward pass]
//! [Make forward pass]
Mat score = net . forward ( ) ;
Mat score = net . forward ( ) ;
//! [Make forward pass]
if ( modelName = = " u2netp " )
{
Mat segm ;
Mat mask , thresholded_mask , foreground_overlay , background_overlay , foreground_segmented ;
colorizeSegmentation ( score , segm ) ;
mask = cv : : Mat ( score . size [ 2 ] , score . size [ 3 ] , CV_32F , score . ptr < float > ( 0 , 0 ) ) ;
mask . convertTo ( mask , CV_8U , 255 ) ;
resize ( segm , segm , frame . size ( ) , 0 , 0 , INTER_NEAREST ) ;
threshold ( mask , thresholded_mask , 0 , 255 , THRESH_BINARY + THRESH_OTSU ) ;
addWeighted ( frame , 0.1 , segm , 0.9 , 0.0 , frame ) ;
resize ( thresholded_mask , thresholded_mask , Size ( frame . cols , frame . rows ) , 0 , 0 , INTER_AREA ) ;
// Create overlays for foreground and background
foreground_overlay = Mat : : zeros ( frame . size ( ) , frame . type ( ) ) ;
background_overlay = Mat : : zeros ( frame . size ( ) , frame . type ( ) ) ;
// Set foreground (object) to red and background to blue
foreground_overlay . setTo ( Scalar ( 0 , 0 , 255 ) , thresholded_mask ) ;
Mat inverted_mask ;
bitwise_not ( thresholded_mask , inverted_mask ) ;
background_overlay . setTo ( Scalar ( 255 , 0 , 0 ) , inverted_mask ) ;
// Blend the overlays with the original frame
addWeighted ( frame , 1 , foreground_overlay , 0.5 , 0 , foreground_segmented ) ;
addWeighted ( foreground_segmented , 1 , background_overlay , 0.5 , 0 , frame ) ;
}
else
{
Mat segm ;
colorizeSegmentation ( score , segm ) ;
resize ( segm , segm , frame . size ( ) , 0 , 0 , INTER_NEAREST ) ;
addWeighted ( frame , 0.1 , segm , 0.9 , 0.0 , frame ) ;
}
// Put efficiency information.
// Put efficiency information.
std : : vector < double > layersTimes ;
vector < double > layersTimes ;
double freq = getTickFrequency ( ) / 1000 ;
double freq = getTickFrequency ( ) / 1000 ;
double t = net . getPerfProfile ( layersTimes ) / freq ;
double t = net . getPerfProfile ( layersTimes ) / freq ;
std : : string label = format ( " Inference time: %.2f ms " , t ) ;
string label = format ( " Inference time: %.2f ms " , t ) ;
putText ( frame , label , Point ( 0 , 15 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
putText ( frame , label , Point ( 0 , 15 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
imshow ( kWinName , frame ) ;
imshow ( kWinName , frame ) ;
@ -194,7 +210,8 @@ void colorizeSegmentation(const Mat &score, Mat &segm)
else if ( chns ! = ( int ) colors . size ( ) )
else if ( chns ! = ( int ) colors . size ( ) )
{
{
CV_Error ( Error : : StsError , format ( " Number of output classes does not match "
CV_Error ( Error : : StsError , format ( " Number of output classes does not match "
" number of colors (%d != %zu) " , chns , colors . size ( ) ) ) ;
" number of colors (%d != %zu) " ,
chns , colors . size ( ) ) ) ;
}
}
Mat maxCl = Mat : : zeros ( rows , cols , CV_8UC1 ) ;
Mat maxCl = Mat : : zeros ( rows , cols , CV_8UC1 ) ;
@ -216,7 +233,6 @@ void colorizeSegmentation(const Mat &score, Mat &segm)
}
}
}
}
}
}
segm . create ( rows , cols , CV_8UC3 ) ;
segm . create ( rows , cols , CV_8UC3 ) ;
for ( int row = 0 ; row < rows ; row + + )
for ( int row = 0 ; row < rows ; row + + )
{
{
@ -239,7 +255,8 @@ void showLegend()
if ( ( int ) colors . size ( ) ! = numClasses )
if ( ( int ) colors . size ( ) ! = numClasses )
{
{
CV_Error ( Error : : StsError , format ( " Number of output classes does not match "
CV_Error ( Error : : StsError , format ( " Number of output classes does not match "
" number of labels (%zu != %zu) " , colors . size ( ) , classes . size ( ) ) ) ;
" number of labels (%zu != %zu) " ,
colors . size ( ) , classes . size ( ) ) ) ;
}
}
legend . create ( kBlockHeight * numClasses , 200 , CV_8UC3 ) ;
legend . create ( kBlockHeight * numClasses , 200 , CV_8UC3 ) ;
for ( int i = 0 ; i < numClasses ; i + + )
for ( int i = 0 ; i < numClasses ; i + + )