@ -8,62 +8,109 @@
# include "common.hpp"
std : : string param_keys =
" { help h | | Print help message. } "
" { @alias | | An alias name of model to extract preprocessing parameters from models.yml file. } "
" { zoo | models.yml | An optional path to file with preprocessing parameters } "
" { input i | | Path to input image or video file. Skip this argument to capture frames from a camera.} "
" { initial_width | 0 | Preprocess input image by initial resizing to a specific width.} "
" { initial_height | 0 | Preprocess input image by initial resizing to a specific height.} "
" { std | 0.0 0.0 0.0 | Preprocess input image by dividing on a standard deviation.} "
" { crop | false | Preprocess input image by center cropping.} "
" { framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. } "
" { needSoftmax | false | Use Softmax to post-process the output of the net.} "
" { classes | | Optional path to a text file with names of classes. } " ;
std : : string backend_keys = cv : : format (
" { backend | 0 | Choose one of computation backends: "
" %d: automatically (by default), "
" %d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
" %d: OpenCV implementation, "
" %d: VKCOM, "
" %d: CUDA, "
" %d: WebNN } " , cv : : dnn : : DNN_BACKEND_DEFAULT , cv : : dnn : : DNN_BACKEND_INFERENCE_ENGINE , cv : : dnn : : DNN_BACKEND_OPENCV , cv : : dnn : : DNN_BACKEND_VKCOM , cv : : dnn : : DNN_BACKEND_CUDA , cv : : dnn : : DNN_BACKEND_WEBNN ) ;
std : : string target_keys = cv : : format (
" { target | 0 | Choose one of target computation devices: "
" %d: CPU target (by default), "
" %d: OpenCL, "
" %d: OpenCL fp16 (half-float precision), "
" %d: VPU, "
" %d: Vulkan, "
" %d: CUDA, "
" %d: CUDA fp16 (half-float preprocess) } " , cv : : dnn : : DNN_TARGET_CPU , cv : : dnn : : DNN_TARGET_OPENCL , cv : : dnn : : DNN_TARGET_OPENCL_FP16 , cv : : dnn : : DNN_TARGET_MYRIAD , cv : : dnn : : DNN_TARGET_VULKAN , cv : : dnn : : DNN_TARGET_CUDA , cv : : dnn : : DNN_TARGET_CUDA_FP16 ) ;
std : : string keys = param_keys + backend_keys + target_keys ;
using namespace cv ;
using namespace std ;
using namespace dnn ;
std : : vector < std : : string > classes ;
const string about =
" Use this script to run a classification model on a camera stream, video, image or image list (i.e. .xml or .yaml containing image lists) \n \n "
" Firstly, download required models using `download_models.py` (if not already done). Set environment variable OPENCV_DOWNLOAD_CACHE_DIR to specify where models should be downloaded. Also, point OPENCV_SAMPLES_DATA_PATH to opencv/samples/data. \n "
" To run: \n "
" \t ./example_dnn_classification model_name --input=path/to/your/input/image/or/video (don't give --input flag if want to use device camera) \n "
" Sample command: \n "
" \t ./example_dnn_classification resnet --input=$OPENCV_SAMPLES_DATA_PATH/baboon.jpg \n "
" \t ./example_dnn_classification squeezenet \n "
" Model path can also be specified using --model argument. "
" Use imagelist_creator to create the xml or yaml list \n " ;
const string param_keys =
" { help h | | Print help message. } "
" { @alias | | An alias name of model to extract preprocessing parameters from models.yml file. } "
" { zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters } "
" { input i | | Path to input image or video file. Skip this argument to capture frames from a camera.} "
" { imglist | | Pass this flag if image list (i.e. .xml or .yaml) file is passed} "
" { crop | false | Preprocess input image by center cropping.} "
//"{ labels | | Path to the text file with labels for detected objects.}"
" { model | | Path to the model file.} " ;
const string backend_keys = format (
" { backend | default | Choose one of computation backends: "
" default: automatically (by default), "
" openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
" opencv: OpenCV implementation, "
" vkcom: VKCOM, "
" cuda: CUDA, "
" webnn: WebNN } " ) ;
const string target_keys = format (
" { target | cpu | Choose one of target computation devices: "
" cpu: CPU target (by default), "
" opencl: OpenCL, "
" opencl_fp16: OpenCL fp16 (half-float precision), "
" vpu: VPU, "
" vulkan: Vulkan, "
" cuda: CUDA, "
" cuda_fp16: CUDA fp16 (half-float preprocess) } " ) ;
string keys = param_keys + backend_keys + target_keys ;
vector < string > classes ;
static bool readStringList ( const string & filename , vector < string > & l )
{
l . resize ( 0 ) ;
FileStorage fs ( filename , FileStorage : : READ ) ;
if ( ! fs . isOpened ( ) )
return false ;
size_t dir_pos = filename . rfind ( ' / ' ) ;
if ( dir_pos = = string : : npos )
dir_pos = filename . rfind ( ' \\ ' ) ;
FileNode n = fs . getFirstTopLevelNode ( ) ;
if ( n . type ( ) ! = FileNode : : SEQ )
return false ;
FileNodeIterator it = n . begin ( ) , it_end = n . end ( ) ;
for ( ; it ! = it_end ; + + it )
{
string fname = ( string ) * it ;
if ( dir_pos ! = string : : npos )
{
string fpath = samples : : findFile ( filename . substr ( 0 , dir_pos + 1 ) + fname , false ) ;
if ( fpath . empty ( ) )
{
fpath = samples : : findFile ( fname ) ;
}
fname = fpath ;
}
else
{
fname = samples : : findFile ( fname ) ;
}
l . push_back ( fname ) ;
}
return true ;
}
int main ( int argc , char * * argv )
{
CommandLineParser parser ( argc , argv , keys ) ;
const std : : string modelName = parser . get < String > ( " @alias " ) ;
const std : : string zooFile = parser . get < String > ( " zoo " ) ;
if ( ! parser . has ( " @alias " ) | | parser . has ( " help " ) )
{
cout < < about < < endl ;
parser . printMessage ( ) ;
return - 1 ;
}
const string modelName = parser . get < String > ( " @alias " ) ;
const string zooFile = findFile ( parser . get < String > ( " zoo " ) ) ;
keys + = genPreprocArguments ( modelName , zooFile ) ;
parser = CommandLineParser ( argc , argv , keys ) ;
parser . about ( " Use this script to run classification deep learning networks using OpenCV. " ) ;
parser . about ( about ) ;
if ( argc = = 1 | | parser . has ( " help " ) )
{
parser . printMessage ( ) ;
return 0 ;
}
int rszWidth = parser . get < int > ( " initial_width " ) ;
int rszHeight = parser . get < int > ( " initial_height " ) ;
String sha1 = parser . get < String > ( " sha1 " ) ;
float scale = parser . get < float > ( " scale " ) ;
Scalar mean = parser . get < Scalar > ( " mean " ) ;
Scalar std = parser . get < Scalar > ( " std " ) ;
@ -71,73 +118,94 @@ int main(int argc, char** argv)
bool crop = parser . get < bool > ( " crop " ) ;
int inpWidth = parser . get < int > ( " width " ) ;
int inpHeight = parser . get < int > ( " height " ) ;
String model = findFile ( parser . get < String > ( " model " ) ) ;
String config = findFile ( parser . get < String > ( " config " ) ) ;
String framework = parser . get < String > ( " framework " ) ;
int backendId = parser . get < int > ( " backend " ) ;
int targetId = parser . get < int > ( " target " ) ;
bool needSoftmax = parser . get < bool > ( " needSoftmax " ) ;
std : : cout < < " mean: " < < mean < < std : : endl ;
std : : cout < < " std: " < < std < < std : : endl ;
// Open file with classes names.
if ( parser . has ( " classes " ) )
String model = findModel ( parser . get < String > ( " model " ) , sha1 ) ;
String backend = parser . get < String > ( " backend " ) ;
String target = parser . get < String > ( " target " ) ;
bool isImgList = parser . has ( " imglist " ) ;
// Open file with labels.
string labels_filename = parser . get < String > ( " labels " ) ;
string file = findFile ( labels_filename ) ;
ifstream ifs ( file . c_str ( ) ) ;
if ( ! ifs . is_open ( ) ) {
cout < < " File " < < file < < " not found " ;
exit ( 1 ) ;
}
string line ;
while ( getline ( ifs , line ) )
{
std : : string file = parser . get < String > ( " classes " ) ;
std : : ifstream ifs ( file . c_str ( ) ) ;
if ( ! ifs . is_open ( ) )
CV_Error ( Error : : StsError , " File " + file + " not found " ) ;
std : : string line ;
while ( std : : getline ( ifs , line ) )
{
classes . push_back ( line ) ;
}
classes . push_back ( line ) ;
}
if ( ! parser . check ( ) )
{
parser . printErrors ( ) ;
return 1 ;
}
CV_Assert ( ! model . empty ( ) ) ;
//! [Read and initialize network]
Net net = readNet ( model , config , framework ) ;
net . setPreferableBackend ( backendId ) ;
net . setPreferableTarget ( targetId ) ;
Net net = readNetFromONNX ( model ) ;
net . setPreferableBackend ( getBackendID ( backend ) ) ;
net . setPreferableTarget ( getTargetID ( target ) ) ;
//! [Read and initialize network]
// Create a window
static const std : : string kWinName = " Deep learning image classification in OpenCV " ;
namedWindow ( kWinName , WINDOW_NORMAL ) ;
//Create FontFace for putText
FontFace sans ( " sans " ) ;
//! [Open a video file or an image file or a camera stream]
VideoCapture cap ;
if ( parser . has ( " input " ) )
cap . open ( parser . get < String > ( " input " ) ) ;
else
cap . open ( 0 ) ;
vector < string > imageList ;
size_t currentImageIndex = 0 ;
if ( parser . has ( " input " ) ) {
string input = findFile ( parser . get < String > ( " input " ) ) ;
if ( isImgList ) {
bool check = readStringList ( samples : : findFile ( input ) , imageList ) ;
if ( imageList . empty ( ) | | ! check ) {
cout < < " Error: No images found or the provided file is not a valid .yaml or .xml file. " < < endl ;
return - 1 ;
}
} else {
// Input is not a directory, try to open as video or image
cap . open ( input ) ;
if ( ! cap . isOpened ( ) ) {
cout < < " Failed to open the input. " < < endl ;
return - 1 ;
}
}
} else {
cap . open ( 0 ) ; // Open default camera
}
//! [Open a video file or an image file or a camera stream]
// Process frames.
Mat frame , blob ;
while ( waitKey ( 1 ) < 0 )
for ( ; ; )
{
cap > > frame ;
if ( ! imageList . empty ( ) ) {
// Handling directory of images
if ( currentImageIndex > = imageList . size ( ) ) {
waitKey ( ) ;
break ; // Exit if all images are processed
}
frame = imread ( imageList [ currentImageIndex + + ] ) ;
if ( frame . empty ( ) ) {
cout < < " Cannot open file " < < endl ;
continue ;
}
} else {
// Handling video or single image
cap > > frame ;
}
if ( frame . empty ( ) )
{
waitKey ( ) ;
break ;
}
if ( rszWidth ! = 0 & & rszHeight ! = 0 )
{
resize ( frame , frame , Size ( rszWidth , rszHeight ) ) ;
}
//! [Create a 4D blob from a frame]
blobFromImage ( frame , blob , scale , Size ( inpWidth , inpHeight ) , mean , swapRB , crop ) ;
// Check std values.
if ( std . val [ 0 ] ! = 0.0 & & std . val [ 1 ] ! = 0.0 & & std . val [ 2 ] ! = 0.0 )
{
@ -145,69 +213,51 @@ int main(int argc, char** argv)
divide ( blob , std , blob ) ;
}
//! [Create a 4D blob from a frame]
//! [Set input blob]
net . setInput ( blob ) ;
//! [Set input blob]
//! [Make forward pass]
// double t_sum = 0.0;
// double t;
int classId ;
double confidence ;
cv : : TickMeter timeRecorder ;
TickMeter timeRecorder ;
timeRecorder . reset ( ) ;
Mat prob = net . forward ( ) ;
double t1 ;
//! [Make forward pass]
timeRecorder . start ( ) ;
prob = net . forward ( ) ;
timeRecorder . stop ( ) ;
t1 = timeRecorder . getTimeMilli ( ) ;
//! [Make forward pass]
timeRecorder . reset ( ) ;
for ( int i = 0 ; i < 200 ; i + + ) {
//! [Make forward pass]
timeRecorder . start ( ) ;
prob = net . forward ( ) ;
timeRecorder . stop ( ) ;
//! [Get a class with a highest score]
Point classIdPoint ;
minMaxLoc ( prob . reshape ( 1 , 1 ) , 0 , & confidence , 0 , & classIdPoint ) ;
classId = classIdPoint . x ;
//! [Get a class with a highest score]
// Put efficiency information.
// std::vector<double> layersTimes;
// double freq = getTickFrequency() / 1000;
// t = net.getPerfProfile(layersTimes) / freq;
// t_sum += t;
}
if ( needSoftmax = = true )
{
float maxProb = 0.0 ;
float sum = 0.0 ;
Mat softmaxProb ;
maxProb = * std : : max_element ( prob . begin < float > ( ) , prob . end < float > ( ) ) ;
cv : : exp ( prob - maxProb , softmaxProb ) ;
sum = ( float ) cv : : sum ( softmaxProb ) [ 0 ] ;
softmaxProb / = sum ;
Point classIdPoint ;
minMaxLoc ( softmaxProb . reshape ( 1 , 1 ) , 0 , & confidence , 0 , & classIdPoint ) ;
classId = classIdPoint . x ;
//! [Get a class with a highest score]
int N = ( int ) prob . total ( ) , K = std : : min ( 5 , N ) ;
std : : vector < std : : pair < float , int > > prob_vec ;
for ( int i = 0 ; i < N ; i + + ) {
prob_vec . push_back ( std : : make_pair ( - prob . at < float > ( i ) , i ) ) ;
}
std : : string label = format ( " Inference time of 1 round: %.2f ms " , t1 ) ;
std : : string label2 = format ( " Average time of 200 rounds: %.2f ms " , timeRecorder . getTimeMilli ( ) / 200 ) ;
putText ( frame , label , Point ( 0 , 15 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
putText ( frame , label2 , Point ( 0 , 35 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
std : : sort ( prob_vec . begin ( ) , prob_vec . end ( ) ) ;
// Print predicted class.
label = format ( " %s: %.4f " , ( classes . empty ( ) ? format ( " Class #%d " , classId ) . c_str ( ) :
classes [ classId ] . c_str ( ) ) ,
confidence ) ;
putText ( frame , label , Point ( 0 , 55 ) , FONT_HERSHEY_SIMPLEX , 0.5 , Scalar ( 0 , 255 , 0 ) ) ;
//! [Get a class with a highest score]
t1 = timeRecorder . getTimeMilli ( ) ;
timeRecorder . reset ( ) ;
string label = format ( " Inference time: %.1f ms " , t1 ) ;
Mat subframe = frame ( Rect ( 0 , 0 , std : : min ( 1000 , frame . cols ) , std : : min ( 300 , frame . rows ) ) ) ;
subframe * = 0.3f ;
putText ( frame , label , Point ( 20 , 50 ) , Scalar ( 0 , 255 , 0 ) , sans , 25 , 800 ) ;
// Print predicted class.
for ( int i = 0 ; i < K ; i + + ) {
int classId = prob_vec [ i ] . second ;
float confidence = - prob_vec [ i ] . first ;
label = format ( " %d. %s: %.2f " , i + 1 , ( classes . empty ( ) ? format ( " Class #%d " , classId ) . c_str ( ) :
classes [ classId ] . c_str ( ) ) , confidence ) ;
putText ( frame , label , Point ( 20 , 110 + i * 35 ) , Scalar ( 0 , 255 , 0 ) , sans , 25 , 500 ) ;
}
imshow ( kWinName , frame ) ;
int key = waitKey ( isImgList ? 1000 : 100 ) ;
if ( key = = ' ' )
key = waitKey ( ) ;
if ( key = = ' q ' | | key = = 27 ) // Check if 'q' or 'ESC' is pressed
return 0 ;
}
waitKey ( ) ;
return 0 ;
}