diff --git a/samples/dnn/dasiamrpn_tracker.cpp b/samples/dnn/dasiamrpn_tracker.cpp new file mode 100644 index 0000000000..0008cee255 --- /dev/null +++ b/samples/dnn/dasiamrpn_tracker.cpp @@ -0,0 +1,518 @@ +// DaSiamRPN tracker. +// Original paper: https://arxiv.org/abs/1808.06048 +// Link to original repo: https://github.com/foolwood/DaSiamRPN +// Links to onnx models: +// - network: https://www.dropbox.com/s/rr1lk9355vzolqv/dasiamrpn_model.onnx?dl=0 +// - kernel_r1: https://www.dropbox.com/s/999cqx5zrfi7w4p/dasiamrpn_kernel_r1.onnx?dl=0 +// - kernel_cls1: https://www.dropbox.com/s/qvmtszx5h339a0w/dasiamrpn_kernel_cls1.onnx?dl=0 + +#include +#include + +#include +#include +#include + +using namespace cv; +using namespace cv::dnn; + +const char *keys = + "{ help h | | Print help message }" + "{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }" + "{ net | dasiamrpn_model.onnx | Path to onnx model of net}" + "{ kernel_cls1 | dasiamrpn_kernel_cls1.onnx | Path to onnx model of kernel_r1 }" + "{ kernel_r1 | dasiamrpn_kernel_r1.onnx | Path to onnx model of kernel_cls1 }" + "{ backend | 0 | Choose one of computation backends: " + "0: automatically (by default), " + "1: Halide language (http://halide-lang.org/), " + "2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " + "3: OpenCV implementation }" + "{ target | 0 | Choose one of target computation devices: " + "0: CPU target (by default), " + "1: OpenCL, " + "2: OpenCL fp16 (half-float precision), " + "3: VPU }" +; + +// Initial parameters of the model +struct trackerConfig +{ + float windowInfluence = 0.43f; + float lr = 0.4f; + int scale = 8; + bool swapRB = false; + int totalStride = 8; + float penaltyK = 0.055f; + int exemplarSize = 127; + int instanceSize = 271; + float contextAmount = 0.5f; + std::vector ratios = { 0.33f, 0.5f, 1.0f, 2.0f, 3.0f }; + int anchorNum = int(ratios.size()); + Mat anchors; + Mat windows; + Scalar avgChans; + Size imgSize = { 0, 0 }; + Rect2f targetBox = { 0, 0, 0, 0 }; + int scoreSize = (instanceSize - exemplarSize) / totalStride + 1; + + void update_scoreSize() + { + scoreSize = int((instanceSize - exemplarSize) / totalStride + 1); + } +}; + +static void softmax(const Mat& src, Mat& dst); +static void elementMax(Mat& src); +static Mat generateHanningWindow(const trackerConfig& trackState); +static Mat generateAnchors(trackerConfig& trackState); +static Mat getSubwindow(Mat& img, const Rect2f& targetBox, float originalSize, Scalar avgChans); +static float trackerEval(Mat img, trackerConfig& trackState, Net& siamRPN); +static void trackerInit(Mat img, trackerConfig& trackState, Net& siamRPN, Net& siamKernelR1, Net& siamKernelCL1); + +template static +T sizeCal(const T& w, const T& h) +{ + T pad = (w + h) * T(0.5); + T sz2 = (w + pad) * (h + pad); + return sqrt(sz2); +} + +template <> +Mat sizeCal(const Mat& w, const Mat& h) +{ + Mat pad = (w + h) * 0.5; + Mat sz2 = (w + pad).mul((h + pad)); + + cv::sqrt(sz2, sz2); + return sz2; +} + +static +int run(int argc, char** argv) +{ + // Parse command line arguments. + CommandLineParser parser(argc, argv, keys); + + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + std::string inputName = parser.get("input"); + std::string net = parser.get("net"); + std::string kernel_cls1 = parser.get("kernel_cls1"); + std::string kernel_r1 = parser.get("kernel_r1"); + int backend = parser.get("backend"); + int target = parser.get("target"); + + // Read nets. + Net siamRPN, siamKernelCL1, siamKernelR1; + try + { + siamRPN = readNet(samples::findFile(net)); + siamKernelCL1 = readNet(samples::findFile(kernel_cls1)); + siamKernelR1 = readNet(samples::findFile(kernel_r1)); + } + catch (const cv::Exception& ee) + { + std::cerr << "Exception: " << ee.what() << std::endl; + std::cout << "Can't load the network by using the following files:" << std::endl; + std::cout << "siamRPN : " << net << std::endl; + std::cout << "siamKernelCL1 : " << kernel_cls1 << std::endl; + std::cout << "siamKernelR1 : " << kernel_r1 << std::endl; + return 2; + } + + // Set model backend. + siamRPN.setPreferableBackend(backend); + siamRPN.setPreferableTarget(target); + siamKernelR1.setPreferableBackend(backend); + siamKernelR1.setPreferableTarget(target); + siamKernelCL1.setPreferableBackend(backend); + siamKernelCL1.setPreferableTarget(target); + + const std::string winName = "DaSiamRPN"; + namedWindow(winName, WINDOW_AUTOSIZE); + + // Open a video file or an image file or a camera stream. + VideoCapture cap; + + if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1)) + { + int c = inputName.empty() ? 0 : inputName[0] - '0'; + std::cout << "Trying to open camera #" << c << " ..." << std::endl; + if (!cap.open(c)) + { + std::cout << "Capture from camera #" << c << " didn't work. Specify -i=