// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // Copyright Amir Hassan (kallaballa) #include #include #include #include using std::vector; using std::string; using namespace cv::v4d; class PedestrianDemoPlan : public Plan { public: using Plan::Plan; private: unsigned long diag_ = 0; cv::Size downSize_; cv::Size_ scale_; int blurKernelSize_ = 0; struct Cache { cv::UMat blur_; cv::UMat local_; uint64_t fps_; } cache_; //BGRA cv::UMat background_; //RGB cv::UMat videoFrame_, videoFrameDown_; //GREY cv::UMat videoFrameDownGrey_; struct Detection { //detected pedestrian locations rectangles std::vector locations_; //detected pedestrian locations as boxes vector> boxes_; //probability of detected object being a pedestrian - currently always set to 1.0 vector probs_; //Faster tracking parameters cv::TrackerKCF::Params params_; //KCF tracker used instead of continous detection cv::Ptr tracker_; bool trackerInitialized_ = false; //If tracking fails re-detect bool redetect_ = true; //Descriptor used for pedestrian detection cv::HOGDescriptor hog_; } detection_; inline static cv::Rect tracked_ = cv::Rect(0,0,1,1); constexpr static auto doRedect_ = [](const Detection& detection){ return !detection.trackerInitialized_ || detection.redetect_; }; constexpr static auto dontRedect_ = [](const Detection& detection){ return detection.trackerInitialized_ && !detection.redetect_; }; //adapted from cv::dnn_objdetect::InferBbox static inline bool pair_comparator(std::pair l1, std::pair l2) { return l1.first > l2.first; } //adapted from cv::dnn_objdetect::InferBbox static void intersection_over_union(std::vector > *boxes, std::vector *base_box, std::vector *iou) { double g_xmin = (*base_box)[0]; double g_ymin = (*base_box)[1]; double g_xmax = (*base_box)[2]; double g_ymax = (*base_box)[3]; double base_box_w = g_xmax - g_xmin; double base_box_h = g_ymax - g_ymin; for (size_t b = 0; b < (*boxes).size(); ++b) { double xmin = std::max((*boxes)[b][0], g_xmin); double ymin = std::max((*boxes)[b][1], g_ymin); double xmax = std::min((*boxes)[b][2], g_xmax); double ymax = std::min((*boxes)[b][3], g_ymax); // Intersection double w = std::max(static_cast(0.0), xmax - xmin); double h = std::max(static_cast(0.0), ymax - ymin); // Union double test_box_w = (*boxes)[b][2] - (*boxes)[b][0]; double test_box_h = (*boxes)[b][3] - (*boxes)[b][1]; double inter_ = w * h; double union_ = test_box_h * test_box_w + base_box_h * base_box_w - inter_; (*iou)[b] = inter_ / (union_ + 1e-7); } } //adapted from cv::dnn_objdetect::InferBbox static std::vector non_maximal_suppression(std::vector > *boxes, std::vector *probs, const double threshold = 0.1) { std::vector keep(((*probs).size())); std::fill(keep.begin(), keep.end(), true); std::vector prob_args_sorted((*probs).size()); std::vector > temp_sort((*probs).size()); for (size_t tidx = 0; tidx < (*probs).size(); ++tidx) { temp_sort[tidx] = std::make_pair((*probs)[tidx], static_cast(tidx)); } std::sort(temp_sort.begin(), temp_sort.end(), pair_comparator); for (size_t idx = 0; idx < temp_sort.size(); ++idx) { prob_args_sorted[idx] = temp_sort[idx].second; } for (std::vector::iterator itr = prob_args_sorted.begin(); itr != prob_args_sorted.end() - 1; ++itr) { size_t idx = itr - prob_args_sorted.begin(); std::vector iou_(prob_args_sorted.size() - idx - 1); std::vector > temp_boxes(iou_.size()); for (size_t bb = 0; bb < temp_boxes.size(); ++bb) { std::vector temp_box(4); for (size_t b = 0; b < 4; ++b) { temp_box[b] = (*boxes)[prob_args_sorted[idx + bb + 1]][b]; } temp_boxes[bb] = temp_box; } intersection_over_union(&temp_boxes, &(*boxes)[prob_args_sorted[idx]], &iou_); for (std::vector::iterator _itr = iou_.begin(); _itr != iou_.end(); ++_itr) { size_t iou_idx = _itr - iou_.begin(); if (*_itr > threshold) { keep[prob_args_sorted[idx + iou_idx + 1]] = false; } } } return keep; } //post process and add layers together static void composite_layers(const cv::UMat background, const cv::UMat foreground, cv::UMat dst, int blurKernelSize, Cache& cache) { cv::boxFilter(foreground, cache.blur_, -1, cv::Size(blurKernelSize, blurKernelSize), cv::Point(-1,-1), true, cv::BORDER_REPLICATE); cv::add(background, cache.blur_, dst); } public: PedestrianDemoPlan(const cv::Rect& viewport) : Plan(viewport) { Global::registerShared(tracked_); } PedestrianDemoPlan(const cv::Size& sz) : PedestrianDemoPlan(cv::Rect(0,0,sz.width, sz.height)) { } void setup(cv::Ptr window) override { int w = size().width; int h = size().height; diag_ = hypot(w, h); downSize_ = { 640 , 360 }; scale_ = { float(w) / downSize_.width, float(h) / downSize_.height }; blurKernelSize_ = std::max(int(diag_ / 200 % 2 == 0 ? diag_ / 200 + 1 : diag_ / 200), 1); window->plain([](Detection& detection){ detection.params_.desc_pca = cv::TrackerKCF::GRAY; detection.params_.compress_feature = false; detection.params_.compressed_size = 1; detection.tracker_ = cv::TrackerKCF::create(detection.params_); detection.hog_.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector()); }, detection_); } void infer(cv::Ptr window) override { window->branch(always_); { window->capture(); window->fb([](const cv::UMat& frameBuffer, const cv::Rect& viewport, cv::UMat& videoFrame){ //copy video frame cvtColor(frameBuffer(viewport),videoFrame,cv::COLOR_BGRA2RGB); //downsample video frame for hog_ detection }, viewport(), videoFrame_); window->plain([](const cv::Size downSize, const cv::UMat& videoFrame, cv::UMat& videoFrameDown, cv::UMat& videoFrameDownGrey, cv::UMat& background){ cv::resize(videoFrame, videoFrameDown, downSize); cv::cvtColor(videoFrameDown, videoFrameDownGrey, cv::COLOR_RGB2GRAY); cv::cvtColor(videoFrame, background, cv::COLOR_RGB2BGRA); }, downSize_, videoFrame_, videoFrameDown_, videoFrameDownGrey_, background_); } window->endbranch(always_); //Try to track the pedestrian (if we currently are tracking one), else re-detect using HOG descriptor window->branch(doRedect_, detection_); { window->plain([](cv::UMat& videoFrameDownGrey, Detection& detection, cv::Rect& tracked, Cache& cache){ detection.redetect_ = false; //Detect pedestrians detection.hog_.detectMultiScale(videoFrameDownGrey, detection.locations_, 0, cv::Size(), cv::Size(), 1.15, 2.0, true); if (!detection.locations_.empty()) { detection.boxes_.clear(); detection.probs_.clear(); //collect all found boxes for (const auto &rect : detection.locations_) { detection.boxes_.push_back( { double(rect.x), double(rect.y), double(rect.x + rect.width), double(rect.y + rect.height) }); detection.probs_.push_back(1.0); } //use nms to filter overlapping boxes (https://medium.com/analytics-vidhya/non-max-suppression-nms-6623e6572536) vector keep = non_maximal_suppression(&detection.boxes_, &detection.probs_, 0.1); for (size_t i = 0; i < keep.size(); ++i) { if (keep[i]) { Global::Scope scope(tracked); //only track the first pedestrian found tracked = detection.locations_[i]; break; } } if(!detection.trackerInitialized_) { Global::Scope scope(tracked); //initialize the tracker once detection.tracker_->init(videoFrameDownGrey, tracked); detection.trackerInitialized_ = true; } } }, videoFrameDownGrey_, detection_, tracked_, cache_); } window->endbranch(doRedect_, detection_); window->branch(dontRedect_, detection_); { window->plain([](cv::UMat& videoFrameDownGrey, Detection& detection, const uint64_t& frameCnt, cv::Rect& tracked, Cache& cache){ Global::Scope scope(tracked); cv::Rect oldTracked = tracked; if((cache.fps_ == 0 || frameCnt % cache.fps_ == 0) || !detection.tracker_->update(videoFrameDownGrey, tracked)) { cache.fps_ = uint64_t(std::ceil(Global::fps())); //detection failed - re-detect detection.redetect_ = true; } tracked.x = (oldTracked.x + tracked.x) / 2.0; tracked.y = (oldTracked.y + tracked.y) / 2.0; tracked.width = (oldTracked.width + tracked.width) / 2.0; tracked.height = (oldTracked.height+ tracked.height) / 2.0; }, videoFrameDownGrey_, detection_, window->frameCount(), tracked_, cache_); } window->endbranch(dontRedect_, detection_); window->branch(always_); { //Draw an ellipse around the tracked pedestrian window->nvg([](const cv::Size& sz, const cv::Size_ scale, cv::Rect& tracked) { using namespace cv::v4d::nvg; float width; float height; float cx; float cy; { Global::Scope scope(tracked); width = tracked.width * scale.width; height = tracked.height * scale.height; cx = (scale.width * tracked.x + (width / 2.0)); cy = (scale.height * tracked.y + ((height) / 2.0)); } clear(); beginPath(); strokeWidth(std::fmax(5, sz.width / 960.0)); strokeColor(cv::v4d::colorConvert(cv::Scalar(0, 127, 255, 200), cv::COLOR_HLS2BGR)); ellipse(cx, cy, (width), (height)); stroke(); }, size(), scale_, tracked_); //Put it all together window->fb([](cv::UMat& frameBuffer, const cv::Rect& viewport, cv::UMat& bg, int blurKernelSize, Cache& cache){ composite_layers(bg, frameBuffer(viewport), frameBuffer(viewport), blurKernelSize, cache); }, viewport(), background_, blurKernelSize_, cache_); window->write(); } window->endbranch(always_); } }; int main(int argc, char **argv) { if (argc != 2) { std::cerr << "Usage: pedestrian-demo " << endl; exit(1); } cv::Ptr plan = new PedestrianDemoPlan(cv::Size(1280, 720)); cv::Ptr window = V4D::make(plan->size(), "Pedestrian Demo", ALL); window->printSystemInfo(); auto src = makeCaptureSource(window, argv[1]); auto sink = makeWriterSink(window, "pedestrian-demo.mkv", src->fps(), plan->size()); window->setSource(src); window->setSink(sink); window->run(plan); return 0; }