Loading...
Searching...
No Matches
samples/cpp/train_HOG.cpp
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/ml.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/videoio.hpp"
#include <iostream>
#include <time.h>
using namespace cv;
using namespace cv::ml;
using namespace std;
vector< float > get_svm_detector( const Ptr< SVM >& svm );
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip );
vector< float > get_svm_detector( const Ptr< SVM >& svm )
{
// get the support vectors
Mat sv = svm->getSupportVectors();
// get the decision function
Mat alpha, svidx;
double rho = svm->getDecisionFunction( 0, alpha, svidx );
vector< float > hog_detector( sv.cols + 1 );
hog_detector[sv.cols] = (float)-rho;
return hog_detector;
}
/*
* Convert training/testing set to be used by OpenCV Machine Learning algorithms.
* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.
* Transposition of samples are made if needed.
*/
{
//--Convert data
const int rows = (int)train_samples.size();
const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows );
for( size_t i = 0 ; i < train_samples.size(); ++i )
{
CV_Assert( train_samples[i].cols == 1 || train_samples[i].rows == 1 );
if( train_samples[i].cols == 1 )
{
transpose( train_samples[i], tmp );
}
else if( train_samples[i].rows == 1 )
{
train_samples[i].copyTo( trainData.row( (int)i ) );
}
}
}
{
vector< String > files;
glob( dirname, files );
for ( size_t i = 0; i < files.size(); ++i )
{
{
cout << files[i] << " is invalid!" << endl; // invalid image, skip it.
continue;
}
if ( showImages )
{
imshow( "image", img );
waitKey( 1 );
}
img_lst.push_back( img );
}
}
{
Rect box;
srand( (unsigned int)time( NULL ) );
for ( size_t i = 0; i < full_neg_lst.size(); i++ )
{
Mat roi = full_neg_lst[i]( box );
}
}
void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip )
{
HOGDescriptor hog;
hog.winSize = wsize;
Mat gray;
vector< float > descriptors;
for( size_t i = 0 ; i < img_lst.size(); i++ )
{
{
( img_lst[i].rows - wsize.height ) / 2,
wsize.width,
wsize.height);
cvtColor( img_lst[i](r), gray, COLOR_BGR2GRAY );
gradient_lst.push_back( Mat( descriptors ).clone() );
if ( use_flip )
{
flip( gray, gray, 1 );
gradient_lst.push_back( Mat( descriptors ).clone() );
}
}
}
}
{
cout << "Testing trained detector..." << endl;
HOGDescriptor hog;
hog.load( obj_det_filename );
vector< String > files;
glob( test_dir, files );
int delay = 0;
VideoCapture cap;
if ( videofilename != "" )
{
if ( videofilename.size() == 1 && isdigit( videofilename[0] ) )
else
cap.open( videofilename );
}
obj_det_filename = "testing " + obj_det_filename;
namedWindow( obj_det_filename, WINDOW_NORMAL );
for( size_t i=0;; i++ )
{
Mat img;
{
cap >> img;
delay = 1;
}
else if( i < files.size() )
{
img = imread( files[i] );
}
if ( img.empty() )
{
return;
}
vector< Rect > detections;
vector< double > foundWeights;
hog.detectMultiScale( img, detections, foundWeights );
for ( size_t j = 0; j < detections.size(); j++ )
{
}
imshow( obj_det_filename, img );
if( waitKey( delay ) == 27 )
{
return;
}
}
}
int main( int argc, char** argv )
{
const char* keys =
{
"{help h| | show help message}"
"{pd | | path of directory contains positive images}"
"{nd | | path of directory contains negative images}"
"{td | | path of directory contains test images}"
"{tv | | test video file name}"
"{dw | | width of the detector}"
"{dh | | height of the detector}"
"{f |false| indicates if the program will generate and use mirrored samples or not}"
"{d |false| train twice}"
"{t |false| test a trained detector}"
"{v |false| visualize training steps}"
"{fn |my_detector.yml| file name of trained SVM}"
};
CommandLineParser parser( argc, argv, keys );
if ( parser.has( "help" ) )
{
parser.printMessage();
exit( 0 );
}
int detector_width = parser.get< int >( "dw" );
int detector_height = parser.get< int >( "dh" );
bool test_detector = parser.get< bool >( "t" );
bool train_twice = parser.get< bool >( "d" );
bool visualization = parser.get< bool >( "v" );
bool flip_samples = parser.get< bool >( "f" );
if ( test_detector )
{
test_trained_detector( obj_det_filename, test_dir, videofilename );
exit( 0 );
}
if( pos_dir.empty() || neg_dir.empty() )
{
parser.printMessage();
cout << "Wrong number of parameters.\n\n"
<< "Example command line:\n" << argv[0] << " -dw=64 -dh=128 -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n"
<< "\nExample command line for testing trained detector:\n" << argv[0] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos";
exit( 1 );
}
vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;
vector< int > labels;
clog << "Positive images are being loaded..." ;
load_images( pos_dir, pos_lst, visualization );
if ( pos_lst.size() > 0 )
{
clog << "...[done] " << pos_lst.size() << " files." << endl;
}
else
{
clog << "no image in " << pos_dir <<endl;
return 1;
}
Size pos_image_size = pos_lst[0].size();
if ( detector_width && detector_height )
{
pos_image_size = Size( detector_width, detector_height );
}
else
{
for ( size_t i = 0; i < pos_lst.size(); ++i )
{
if( pos_lst[i].size() != pos_image_size )
{
cout << "All positive images should be same size!" << endl;
exit( 1 );
}
}
pos_image_size = pos_image_size / 8 * 8;
}
clog << "Negative images are being loaded...";
load_images( neg_dir, full_neg_lst, visualization );
clog << "...[done] " << full_neg_lst.size() << " files." << endl;
clog << "Negative images are being processed...";
sample_neg( full_neg_lst, neg_lst, pos_image_size );
clog << "...[done] " << neg_lst.size() << " files." << endl;
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );
size_t positive_count = gradient_lst.size();
labels.assign( positive_count, +1 );
clog << "...[done] ( positive images count : " << positive_count << " )" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );
size_t negative_count = gradient_lst.size() - positive_count;
labels.insert( labels.end(), negative_count, -1 );
CV_Assert( positive_count < labels.size() );
clog << "...[done] ( negative images count : " << negative_count << " )" << endl;
Mat train_data;
convert_to_ml( gradient_lst, train_data );
clog << "Training SVM...";
Ptr< SVM > svm = SVM::create();
/* Default values to train SVM */
svm->setCoef0( 0.0 );
svm->setDegree( 3 );
svm->setTermCriteria( TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 1e-3 ) );
svm->setGamma( 0 );
svm->setKernel( SVM::LINEAR );
svm->setNu( 0.5 );
svm->setP( 0.1 ); // for EPSILON_SVR, epsilon in loss function?
svm->setC( 0.01 ); // From paper, soft classifier
svm->setType( SVM::EPS_SVR ); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task
svm->train( train_data, ROW_SAMPLE, labels );
clog << "...[done]" << endl;
if ( train_twice )
{
clog << "Testing trained detector on negative images. This might take a few minutes...";
HOGDescriptor my_hog;
my_hog.winSize = pos_image_size;
// Set the trained svm to my_hog
my_hog.setSVMDetector( get_svm_detector( svm ) );
vector< Rect > detections;
vector< double > foundWeights;
for ( size_t i = 0; i < full_neg_lst.size(); i++ )
{
if ( full_neg_lst[i].cols >= pos_image_size.width && full_neg_lst[i].rows >= pos_image_size.height )
my_hog.detectMultiScale( full_neg_lst[i], detections, foundWeights );
else
detections.clear();
for ( size_t j = 0; j < detections.size(); j++ )
{
Mat detection = full_neg_lst[i]( detections[j] ).clone();
resize( detection, detection, pos_image_size, 0, 0, INTER_LINEAR_EXACT);
neg_lst.push_back( detection );
}
if ( visualization )
{
for ( size_t j = 0; j < detections.size(); j++ )
{
}
imshow( "testing trained detector on negative images", full_neg_lst[i] );
waitKey( 5 );
}
}
clog << "...[done]" << endl;
gradient_lst.clear();
clog << "Histogram of Gradients are being calculated for positive images...";
computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );
positive_count = gradient_lst.size();
clog << "...[done] ( positive count : " << positive_count << " )" << endl;
clog << "Histogram of Gradients are being calculated for negative images...";
computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );
negative_count = gradient_lst.size() - positive_count;
clog << "...[done] ( negative count : " << negative_count << " )" << endl;
labels.clear();
labels.assign(positive_count, +1);
labels.insert(labels.end(), negative_count, -1);
clog << "Training SVM again...";
convert_to_ml( gradient_lst, train_data );
svm->train( train_data, ROW_SAMPLE, labels );
clog << "...[done]" << endl;
}
HOGDescriptor hog;
hog.winSize = pos_image_size;
hog.setSVMDetector( get_svm_detector( svm ) );
hog.save( obj_det_filename );
test_trained_detector( obj_det_filename, test_dir, videofilename );
return 0;
}
CV_NODISCARD_STD Mat clone() const
Creates a full copy of the array and the underlying data.
int rows
the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
Definition: mat.hpp:2137
The class defining termination criteria for iterative algorithms.
Definition: types.hpp:886
Class for video capturing from video files, image sequences or cameras.
Definition: videoio.hpp:728
virtual bool open(const String &filename, int apiPreference=CAP_ANY)
Opens a video file or a capturing device or an IP video stream for video capturing.
virtual bool isOpened() const
Returns true if video capturing has been initialized already.
void flip(InputArray src, OutputArray dst, int flipCode)
Flips a 2D array around vertical, horizontal, or both axes.
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
void glob(String pattern, std::vector< String > &result, bool recursive=false)
void imshow(const String &winname, InputArray mat)
Displays an image in the specified window.
CV_EXPORTS_W Mat imread(const String &filename, int flags=IMREAD_COLOR)
Loads an image from a file.
void cvtColor(InputArray src, OutputArray dst, int code, int dstCn=0)
Converts an image from one color space to another.
void rectangle(InputOutputArray img, Point pt1, Point pt2, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
Draws a simple, thick, or filled up-right rectangle.
void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR)
Resizes an image.
Definition: ml.hpp:75
"black box" representation of the file storage associated with a file on disk.
Definition: core.hpp:106
STL namespace.
Implementation of HOG (Histogram of Oriented Gradients) descriptor and object detector.
Definition: objdetect.hpp:401
virtual void compute(InputArray img, std::vector< float > &descriptors, Size winStride=Size(), Size padding=Size(), const std::vector< Point > &locations=std::vector< Point >()) const
Computes HOG descriptors of given image.
virtual void save(const String &filename, const String &objname=String()) const
saves HOGDescriptor parameters and coefficients for the linear SVM classifier to a file
virtual void setSVMDetector(InputArray svmdetector)
Sets coefficients for the linear SVM classifier.
Size winSize
Detection window size. Align to block size and block stride. Default value is Size(64,...
Definition: objdetect.hpp:619
virtual bool load(const String &filename, const String &objname=String())
loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file
virtual void detectMultiScale(InputArray img, std::vector< Rect > &foundLocations, std::vector< double > &foundWeights, double hitThreshold=0, Size winStride=Size(), Size padding=Size(), double scale=1.05, double groupThreshold=2.0, bool useMeanshiftGrouping=false) const
Detects objects of different sizes in the input image. The detected objects are returned as a list of...