Loading...
Searching...
No Matches
Pedestrian-Demo
Prev Tutorial: Font-Demo
Next Tutorial: Optflow-Demo
Original author | Amir Hassan (kallaballa) amir@.nosp@m.viel.nosp@m.-zu.o.nosp@m.rg |
Compatibility | OpenCV >= 4.7 |
Pedestrian detection using HOG with a linear SVM, non-maximal suppression and tracking using KCF. Uses nanovg for rendering (OpenGL), detects using a linear SVM (OpenCV), filters resuls using NMS and tracks using KCF.
Downloading...
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright Amir Hassan (kallaballa) <amir@viel-zu.org>
#include <opencv2/v4d/v4d.hpp>
#include <opencv2/tracking.hpp>
#include <opencv2/objdetect.hpp>
#include <string>
using std::cerr;
using std::endl;
using std::vector;
using std::string;
/* Demo parameters */
#ifndef __EMSCRIPTEN__
constexpr long unsigned int WIDTH = 1280;
constexpr long unsigned int HEIGHT = 720;
#else
constexpr long unsigned int WIDTH = 960;
constexpr long unsigned int HEIGHT = 960;
#endif
const unsigned long DIAG = hypot(double(WIDTH), double(HEIGHT));
constexpr unsigned int DOWNSIZE_WIDTH = 640;
constexpr unsigned int DOWNSIZE_HEIGHT = 360;
constexpr double WIDTH_SCALE = double(WIDTH) / DOWNSIZE_WIDTH;
constexpr double HEIGHT_SCALE = double(HEIGHT) / DOWNSIZE_HEIGHT;
constexpr bool OFFSCREEN = false;
#ifndef __EMSCRIPTEN__
constexpr const char* OUTPUT_FILENAME = "pedestrian-demo.mkv";
#endif
const int BLUR_KERNEL_SIZE = std::max(int(DIAG / 200 % 2 == 0 ? DIAG / 200 + 1 : DIAG / 200), 1);
//adapted from cv::dnn_objdetect::InferBbox
static inline bool pair_comparator(std::pair<double, size_t> l1, std::pair<double, size_t> l2) {
return l1.first > l2.first;
}
//adapted from cv::dnn_objdetect::InferBbox
static void intersection_over_union(std::vector<std::vector<double> > *boxes, std::vector<double> *base_box, std::vector<double> *iou) {
double g_xmin = (*base_box)[0];
double g_ymin = (*base_box)[1];
double g_xmax = (*base_box)[2];
double g_ymax = (*base_box)[3];
double base_box_w = g_xmax - g_xmin;
double base_box_h = g_ymax - g_ymin;
for (size_t b = 0; b < (*boxes).size(); ++b) {
double xmin = std::max((*boxes)[b][0], g_xmin);
double ymin = std::max((*boxes)[b][1], g_ymin);
double xmax = std::min((*boxes)[b][2], g_xmax);
double ymax = std::min((*boxes)[b][3], g_ymax);
// Intersection
double w = std::max(static_cast<double>(0.0), xmax - xmin);
double h = std::max(static_cast<double>(0.0), ymax - ymin);
// Union
double test_box_w = (*boxes)[b][2] - (*boxes)[b][0];
double test_box_h = (*boxes)[b][3] - (*boxes)[b][1];
double inter_ = w * h;
double union_ = test_box_h * test_box_w + base_box_h * base_box_w - inter_;
(*iou)[b] = inter_ / (union_ + 1e-7);
}
}
//adapted from cv::dnn_objdetect::InferBbox
static std::vector<bool> non_maximal_suppression(std::vector<std::vector<double> > *boxes, std::vector<double> *probs, const double threshold = 0.1) {
std::vector<bool> keep(((*probs).size()));
std::fill(keep.begin(), keep.end(), true);
std::vector<size_t> prob_args_sorted((*probs).size());
std::vector<std::pair<double, size_t> > temp_sort((*probs).size());
for (size_t tidx = 0; tidx < (*probs).size(); ++tidx) {
temp_sort[tidx] = std::make_pair((*probs)[tidx], static_cast<size_t>(tidx));
}
std::sort(temp_sort.begin(), temp_sort.end(), pair_comparator);
for (size_t idx = 0; idx < temp_sort.size(); ++idx) {
prob_args_sorted[idx] = temp_sort[idx].second;
}
for (std::vector<size_t>::iterator itr = prob_args_sorted.begin(); itr != prob_args_sorted.end() - 1; ++itr) {
size_t idx = itr - prob_args_sorted.begin();
std::vector<double> iou_(prob_args_sorted.size() - idx - 1);
std::vector<std::vector<double> > temp_boxes(iou_.size());
for (size_t bb = 0; bb < temp_boxes.size(); ++bb) {
std::vector<double> temp_box(4);
for (size_t b = 0; b < 4; ++b) {
temp_box[b] = (*boxes)[prob_args_sorted[idx + bb + 1]][b];
}
temp_boxes[bb] = temp_box;
}
intersection_over_union(&temp_boxes, &(*boxes)[prob_args_sorted[idx]], &iou_);
for (std::vector<double>::iterator _itr = iou_.begin(); _itr != iou_.end(); ++_itr) {
size_t iou_idx = _itr - iou_.begin();
if (*_itr > threshold) {
keep[prob_args_sorted[idx + iou_idx + 1]] = false;
}
}
}
return keep;
}
using namespace cv::v4d;
struct Cache {
cv::UMat blur_;
} cache_;
//BGRA
cv::UMat background_;
//RGB
cv::UMat videoFrame_, videoFrameDown_;
//GREY
cv::UMat videoFrameDownGrey_;
//detected pedestrian locations rectangles
std::vector<cv::Rect> locations_;
//detected pedestrian locations as boxes
vector<vector<double>> boxes_;
//probability of detected object being a pedestrian - currently always set to 1.0
vector<double> probs_;
//Faster tracking parameters
cv::TrackerKCF::Params params_;
//KCF tracker used instead of continous detection
cv::Ptr<cv::Tracker> tracker_;
bool trackerInitialized_ = false;
//If tracking fails re-detect
bool redetect_ = true;
//Descriptor used for pedestrian detection
cv::HOGDescriptor hog_;
//post process and add layers together
static void composite_layers(const cv::UMat background, const cv::UMat foreground, cv::UMat dst, int blurKernelSize, Cache& cache) {
cv::boxFilter(foreground, cache.blur_, -1, cv::Size(blurKernelSize, blurKernelSize), cv::Point(-1,-1), true, cv::BORDER_REPLICATE);
cv::add(background, cache.blur_, dst);
}
public:
window->parallel([](cv::TrackerKCF::Params& params, cv::Ptr<cv::Tracker>& tracker, cv::HOGDescriptor& hog){
params.desc_pca = cv::TrackerKCF::GRAY;
params.compress_feature = false;
params.compressed_size = 1;
tracker = cv::TrackerKCF::create(params);
}, params_, tracker_, hog_);
}
static auto always = [](){ return true; };
static auto doRedect = [](const bool& trackerInit, const bool& redetect){ return !trackerInit || redetect; };
static auto dontRedect = [](const bool& trackerInit, const bool& redetect){ return trackerInit && !redetect; };
window->branch(always);
{
window->capture();
//copy video frame
cvtColor(frameBuffer,videoFrame,cv::COLOR_BGRA2RGB);
//downsample video frame for hog_ detection
}, videoFrame_);
window->parallel([](const cv::UMat& videoFrame, cv::UMat& videoFrameDown, cv::UMat& videoFrameDownGrey, cv::UMat& background){
cv::resize(videoFrame, videoFrameDown, cv::Size(DOWNSIZE_WIDTH, DOWNSIZE_HEIGHT));
cv::cvtColor(videoFrameDown, videoFrameDownGrey, cv::COLOR_RGB2GRAY);
cv::cvtColor(videoFrame, background, cv::COLOR_RGB2BGRA);
}, videoFrame_, videoFrameDown_, videoFrameDownGrey_, background_);
}
window->endbranch(always);
//Try to track the pedestrian (if we currently are tracking one), else re-detect using HOG descriptor
window->branch(doRedect, trackerInitialized_, redetect_);
{
window->parallel([](cv::HOGDescriptor& hog, bool& redetect, cv::UMat& videoFrameDownGrey, std::vector<cv::Rect>& locations, vector<vector<double>>& boxes, vector<double>& probs, cv::Ptr<cv::Tracker>& tracker, cv::Rect& tracked, bool& trackerInitialized){
redetect = false;
//Detect pedestrians
if (!locations.empty()) {
boxes.clear();
probs.clear();
//collect all found boxes
for (const auto &rect : locations) {
boxes.push_back( { double(rect.x), double(rect.y), double(rect.x + rect.width), double(rect.y + rect.height) });
probs.push_back(1.0);
}
//use nms to filter overlapping boxes (https://medium.com/analytics-vidhya/non-max-suppression-nms-6623e6572536)
vector<bool> keep = non_maximal_suppression(&boxes, &probs, 0.1);
for (size_t i = 0; i < keep.size(); ++i) {
if (keep[i]) {
//only track the first pedestrian found
tracked = locations[i];
break;
}
}
if(!trackerInitialized) {
// initialize the tracker once
tracker->init(videoFrameDownGrey, tracked);
trackerInitialized = true;
}
}
}, hog_, redetect_, videoFrameDownGrey_, locations_, boxes_, probs_, tracker_, tracked_, trackerInitialized_);
}
window->endbranch(doRedect, trackerInitialized_, redetect_);
window->branch(dontRedect, trackerInitialized_, redetect_);
{
window->parallel([](bool& redetect, cv::UMat& videoFrameDownGrey, cv::Ptr<cv::Tracker>& tracker, cv::Rect& tracked){
if(!tracker->update(videoFrameDownGrey, tracked)) {
//detection failed - re-detect
redetect = true;
}
}, redetect_, videoFrameDownGrey_, tracker_, tracked_);
}
window->endbranch(dontRedect, trackerInitialized_, redetect_);
window->branch(always);
{
//Draw an ellipse around the tracked pedestrian
using namespace cv::v4d::nvg;
clear();
beginPath();
strokeWidth(std::fmax(2.0, sz.width / 960.0));
float width = tracked.width * WIDTH_SCALE;
float height = tracked.height * HEIGHT_SCALE;
float cx = tracked.x * WIDTH_SCALE + (width / 2.0f);
float cy = tracked.y * HEIGHT_SCALE + (height / 2.0f);
ellipse(cx, cy, width / 2.0f, height / 2.0f);
stroke();
}, window->fbSize(), tracked_);
//Put it all together
composite_layers(bg, frameBuffer, frameBuffer, BLUR_KERNEL_SIZE, cache);
}, background_, cache_);
window->write();
}
window->endbranch(always);
}
};
int main(int argc, char **argv) {
CV_UNUSED(argc);
CV_UNUSED(argv);
if (argc != 2) {
std::cerr << "Usage: pedestrian-demo <video-input>" << endl;
exit(1);
}
using namespace cv::v4d;
cv::Ptr<V4D> window = V4D::make(WIDTH, HEIGHT, "Pedestrian Demo", ALL, OFFSCREEN);
window->printSystemInfo();
#ifndef __EMSCRIPTEN__
auto src = makeCaptureSource(window, argv[1]);
window->setSource(src);
window->setSink(sink);
#else
auto src = makeCaptureSource(WIDTH, HEIGHT, window);
window->setSource(src);
#endif
window->run<PedestrianDemoPlan>(2);
return 0;
}
static Ptr< TrackerKCF > create(const TrackerKCF::Params ¶meters=TrackerKCF::Params())
Create KCF tracker instance.
Definition: mat.hpp:2432
Definition: v4d.hpp:68
void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask=noArray(), int dtype=-1)
Calculates the per-element sum of two arrays or an array and a scalar.
void cvtColor(InputArray src, OutputArray dst, int code, int dstCn=0)
Converts an image from one color space to another.
@ COLOR_HLS2BGR
backward conversions HLS to RGB/BGR with H range 0..180 if 8 bit image
Definition: imgproc.hpp:616
void ellipse(InputOutputArray img, Point center, Size axes, double angle, double startAngle, double endAngle, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
Draws a simple or thick elliptic arc or fills an ellipse sector.
void boxFilter(InputArray src, OutputArray dst, int ddepth, Size ksize, Point anchor=Point(-1,-1), bool normalize=true, int borderType=BORDER_DEFAULT)
Blurs an image using the box filter.
void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR)
Resizes an image.
PyParams params(const std::string &tag, const std::string &model, const std::string &weights, const std::string &device)
Net::Result infer(cv::GOpaque< cv::Rect > roi, T in)
Calculates response for the specified network (template parameter) for the specified region in the so...
Definition: infer.hpp:474
Definition: nvg.hpp:20
void beginPath()
void strokeColor(const cv::Scalar &bgra)
void clear(const cv::Scalar &bgra=cv::Scalar(0, 0, 0, 255))
void stroke()
void rect(float x, float y, float w, float h)
void strokeWidth(float size)
Definition: backend.hpp:15
cv::Ptr< Sink > makeWriterSink(cv::Ptr< V4D > window, const string &outputFilename, const float fps, const cv::Size &frameSize)
cv::Ptr< Source > makeCaptureSource(cv::Ptr< V4D > window, const string &inputFilename)
cv::Scalar colorConvert(const cv::Scalar &src, cv::ColorConversionCodes code)
Implementation of HOG (Histogram of Oriented Gradients) descriptor and object detector.
Definition: objdetect.hpp:401
virtual void setSVMDetector(InputArray svmdetector)
Sets coefficients for the linear SVM classifier.
static std::vector< float > getDefaultPeopleDetector()
Returns coefficients of the classifier trained for people detection (for 64x128 windows).
virtual void detectMultiScale(InputArray img, std::vector< Rect > &foundLocations, std::vector< double > &foundWeights, double hitThreshold=0, Size winStride=Size(), Size padding=Size(), double scale=1.05, double groupThreshold=2.0, bool useMeanshiftGrouping=false) const
Detects objects of different sizes in the input image. The detected objects are returned as a list of...
Definition: tracking.hpp:118