@ -24,8 +24,8 @@ TrackerVit::~TrackerVit()
TrackerVit : : Params : : Params ( )
TrackerVit : : Params : : Params ( )
{
{
net = " vitTracker.onnx " ;
net = " vitTracker.onnx " ;
meanvalue = Scalar { 0.485 , 0.456 , 0.406 } ;
meanvalue = Scalar { 0.485 , 0.456 , 0.406 } ; // normalized mean (already divided by 255)
stdvalue = Scalar { 0.229 , 0.224 , 0.225 } ;
stdvalue = Scalar { 0.229 , 0.224 , 0.225 } ; // normalized std (already divided by 255)
# ifdef HAVE_OPENCV_DNN
# ifdef HAVE_OPENCV_DNN
backend = dnn : : DNN_BACKEND_DEFAULT ;
backend = dnn : : DNN_BACKEND_DEFAULT ;
target = dnn : : DNN_TARGET_CPU ;
target = dnn : : DNN_TARGET_CPU ;
@ -33,6 +33,7 @@ TrackerVit::Params::Params()
backend = - 1 ; // invalid value
backend = - 1 ; // invalid value
target = - 1 ; // invalid value
target = - 1 ; // invalid value
# endif
# endif
tracking_score_threshold = 0.20f ; // safe threshold to filter out black frames
}
}
# ifdef HAVE_OPENCV_DNN
# ifdef HAVE_OPENCV_DNN
@ -48,6 +49,9 @@ public:
net . setPreferableBackend ( params . backend ) ;
net . setPreferableBackend ( params . backend ) ;
net . setPreferableTarget ( params . target ) ;
net . setPreferableTarget ( params . target ) ;
i2bp . mean = params . meanvalue * 255.0 ;
i2bp . scalefactor = ( 1.0 / params . stdvalue ) * ( 1 / 255.0 ) ;
}
}
void init ( InputArray image , const Rect & boundingBox ) CV_OVERRIDE ;
void init ( InputArray image , const Rect & boundingBox ) CV_OVERRIDE ;
@ -58,6 +62,7 @@ public:
float tracking_score ;
float tracking_score ;
TrackerVit : : Params params ;
TrackerVit : : Params params ;
dnn : : Image2BlobParams i2bp ;
protected :
protected :
@ -69,10 +74,9 @@ protected:
Mat hanningWindow ;
Mat hanningWindow ;
dnn : : Net net ;
dnn : : Net net ;
Mat image ;
} ;
} ;
static void crop_image ( const Mat & src , Mat & dst , Rect box , int factor )
static int crop_image ( const Mat & src , Mat & dst , Rect box , int factor )
{
{
int x = box . x , y = box . y , w = box . width , h = box . height ;
int x = box . x , y = box . y , w = box . width , h = box . height ;
int crop_sz = cvCeil ( sqrt ( w * h ) * factor ) ;
int crop_sz = cvCeil ( sqrt ( w * h ) * factor ) ;
@ -90,21 +94,16 @@ static void crop_image(const Mat& src, Mat& dst, Rect box, int factor)
Rect roi ( x1 + x1_pad , y1 + y1_pad , x2 - x2_pad - x1 - x1_pad , y2 - y2_pad - y1 - y1_pad ) ;
Rect roi ( x1 + x1_pad , y1 + y1_pad , x2 - x2_pad - x1 - x1_pad , y2 - y2_pad - y1 - y1_pad ) ;
Mat im_crop = src ( roi ) ;
Mat im_crop = src ( roi ) ;
copyMakeBorder ( im_crop , dst , y1_pad , y2_pad , x1_pad , x2_pad , BORDER_CONSTANT ) ;
copyMakeBorder ( im_crop , dst , y1_pad , y2_pad , x1_pad , x2_pad , BORDER_CONSTANT ) ;
return crop_sz ;
}
}
void TrackerVitImpl : : preprocess ( const Mat & src , Mat & dst , Size size )
void TrackerVitImpl : : preprocess ( const Mat & src , Mat & dst , Size size )
{
{
Mat mean = Mat ( size , CV_32FC3 , params . meanvalue ) ;
Mat std = Mat ( size , CV_32FC3 , params . stdvalue ) ;
mean = dnn : : blobFromImage ( mean , 1.0 , Size ( ) , Scalar ( ) , false ) ;
std = dnn : : blobFromImage ( std , 1.0 , Size ( ) , Scalar ( ) , false ) ;
Mat img ;
Mat img ;
resize ( src , img , size ) ;
resize ( src , img , size ) ;
dst = dnn : : blobFromImage ( img , 1.0 , Size ( ) , Scalar ( ) , false ) ;
dst = dnn : : blobFromImageWithParams ( img , i2bp ) ;
dst / = 255 ;
dst = ( dst - mean ) / std ;
}
}
static Mat hann1d ( int sz , bool centered = true ) {
static Mat hann1d ( int sz , bool centered = true ) {
@ -141,22 +140,21 @@ static Mat hann2d(Size size, bool centered = true) {
return hanningWindow ;
return hanningWindow ;
}
}
static Rect returnfromcrop ( float x , float y , float w , float h , Rect res_L ast)
static void updateLastRect ( float c x, float c y, float w , float h , int crop_size , Rect & rect_l ast)
{
{
int cropwindowwh = 4 * cvFloor ( sqrt ( res_Last . width * res_Last . height ) ) ;
int x0 = rect_last . x + ( rect_last . width - crop_size ) / 2 ;
int x0 = res_Last . x + ( res_Last . width - cropwindowwh ) / 2 ;
int y0 = rect_last . y + ( rect_last . height - crop_size ) / 2 ;
int y0 = res_Last . y + ( res_Last . height - cropwindowwh ) / 2 ;
Rect finalres ;
float x1 = cx - w / 2 , y1 = cy - h / 2 ;
finalres . x = cvFloor ( x * cropwindowwh + x0 ) ;
rect_last . x = cvFloor ( x1 * crop_size + x0 ) ;
finalres . y = cvFloor ( y * cropwindowwh + y0 ) ;
rect_last . y = cvFloor ( y1 * crop_size + y0 ) ;
finalres . width = cvFloor ( w * cropwindowwh ) ;
rect_last . width = cvFloor ( w * crop_size ) ;
finalres . height = cvFloor ( h * cropwindowwh ) ;
rect_last . height = cvFloor ( h * crop_size ) ;
return finalres ;
}
}
void TrackerVitImpl : : init ( InputArray image_ , const Rect & boundingBox_ )
void TrackerVitImpl : : init ( InputArray image_ , const Rect & boundingBox_ )
{
{
image = image_ . getMat ( ) . clone ( ) ;
Mat image = image_ . getMat ( ) ;
Mat crop ;
Mat crop ;
crop_image ( image , crop , boundingBox_ , 2 ) ;
crop_image ( image , crop , boundingBox_ , 2 ) ;
Mat blob ;
Mat blob ;
@ -169,9 +167,9 @@ void TrackerVitImpl::init(InputArray image_, const Rect &boundingBox_)
bool TrackerVitImpl : : update ( InputArray image_ , Rect & boundingBoxRes )
bool TrackerVitImpl : : update ( InputArray image_ , Rect & boundingBoxRes )
{
{
image = image_ . getMat ( ) . clone ( ) ;
Mat image = image_ . getMat ( ) ;
Mat crop ;
Mat crop ;
crop_image ( image , crop , rect_last , 4 ) ;
int crop_size = crop_image ( image , crop , rect_last , 4 ) ; // crop: [crop_size, crop_size]
Mat blob ;
Mat blob ;
preprocess ( crop , blob , searchSize ) ;
preprocess ( crop , blob , searchSize ) ;
net . setInput ( blob , " search " ) ;
net . setInput ( blob , " search " ) ;
@ -191,15 +189,18 @@ bool TrackerVitImpl::update(InputArray image_, Rect &boundingBoxRes)
minMaxLoc ( conf_map , nullptr , & maxVal , nullptr , & maxLoc ) ;
minMaxLoc ( conf_map , nullptr , & maxVal , nullptr , & maxLoc ) ;
tracking_score = static_cast < float > ( maxVal ) ;
tracking_score = static_cast < float > ( maxVal ) ;
float cx = ( maxLoc . x + offset_map . at < float > ( 0 , maxLoc . y , maxLoc . x ) ) / 16 ;
if ( tracking_score > = params . tracking_score_threshold ) {
float cy = ( maxLoc . y + offset_map . at < float > ( 1 , maxLoc . y , maxLoc . x ) ) / 16 ;
float cx = ( maxLoc . x + offset_map . at < float > ( 0 , maxLoc . y , maxLoc . x ) ) / 16 ;
float w = size_map . at < float > ( 0 , maxLoc . y , maxLoc . x ) ;
float cy = ( maxLoc . y + offset_map . at < float > ( 1 , maxLoc . y , maxLoc . x ) ) / 16 ;
float h = size_map . at < float > ( 1 , maxLoc . y , maxLoc . x ) ;
float w = size_map . at < float > ( 0 , maxLoc . y , maxLoc . x ) ;
float h = size_map . at < float > ( 1 , maxLoc . y , maxLoc . x ) ;
Rect finalres = returnfromcrop ( cx - w / 2 , cy - h / 2 , w , h , rect_last ) ;
rect_last = finalres ;
updateLastRect ( cx , cy , w , h , crop_size , rect_last ) ;
boundingBoxRes = finalres ;
boundingBoxRes = rect_last ;
return true ;
return true ;
} else {
return false ;
}
}
}
float TrackerVitImpl : : getTrackingScore ( )
float TrackerVitImpl : : getTrackingScore ( )