In this tutorial you will learn how to use opencv_dnn module for image classification by using GoogLeNet trained network from Caffe model zoo.
We will demonstrate results of this example on the following picture.
#include <fstream>
#include <sstream>
#include <iostream>
#include "common.hpp"
std::string keys =
"{ help h | | Print help message. }"
"{ @alias | | An alias name of model to extract preprocessing parameters from models.yml file. }"
"{ zoo | models.yml | An optional path to file with preprocessing parameters }"
"{ input i | | Path to input image or video file. Skip this argument to capture frames from a camera.}"
"{ initial_width | 0 | Preprocess input image by initial resizing to a specific width.}"
"{ initial_height | 0 | Preprocess input image by initial resizing to a specific height.}"
"{ std | 0.0 0.0 0.0 | Preprocess input image by dividing on a standard deviation.}"
"{ crop | false | Preprocess input image by center cropping.}"
"{ framework f | | Optional name of an origin framework of the model. Detect it automatically if it does not set. }"
"{ needSoftmax | false | Use Softmax to post-process the output of the net.}"
"{ classes | | Optional path to a text file with names of classes. }"
"{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default), "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
"3: OpenCV implementation, "
"4: VKCOM, "
"5: CUDA, "
"6: WebNN }"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default), "
"1: OpenCL, "
"2: OpenCL fp16 (half-float precision), "
"3: VPU, "
"4: Vulkan, "
"6: CUDA, "
"7: CUDA fp16 (half-float preprocess) }";
using namespace dnn;
std::vector<std::string> classes;
int main(int argc, char** argv)
{
const std::string modelName = parser.get<
String>(
"@alias");
const std::string zooFile = parser.get<
String>(
"zoo");
keys += genPreprocArguments(modelName, zooFile);
parser.about("Use this script to run classification deep learning networks using OpenCV.");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
return 0;
}
int rszWidth = parser.get<int>("initial_width");
int rszHeight = parser.get<int>("initial_height");
float scale = parser.get<float>("scale");
bool swapRB = parser.get<bool>("rgb");
bool crop = parser.get<bool>("crop");
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
int backendId = parser.get<int>("backend");
int targetId = parser.get<int>("target");
bool needSoftmax = parser.get<bool>("needSoftmax");
std::cout<<"mean: "<<mean<<std::endl;
std::cout<<
"std: "<<
std<<std::endl;
if (parser.has("classes"))
{
std::string file = parser.get<
String>(
"classes");
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError,
"File " + file +
" not found");
std::string line;
while (std::getline(ifs, line))
{
classes.push_back(line);
}
}
if (!parser.check())
{
parser.printErrors();
return 1;
}
Net net =
readNet(model, config, framework);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
static const std::string kWinName = "Deep learning image classification in OpenCV";
if (parser.has("input"))
else
{
cap >> frame;
if (frame.empty())
{
break;
}
if (rszWidth != 0 && rszHeight != 0)
{
}
if (
std.val[0] != 0.0 &&
std.val[1] != 0.0 &&
std.val[2] != 0.0)
{
}
net.setInput(blob);
int classId;
double confidence;
Mat prob = net.forward();
double t1;
prob = net.forward();
for(int i = 0; i < 200; i++) {
prob = net.forward();
classId = classIdPoint.
x;
}
if (needSoftmax == true)
{
float maxProb = 0.0;
maxProb = *std::max_element(prob.
begin<
float>(), prob.
end<
float>());
cv::exp(prob-maxProb, softmaxProb);
classId = classIdPoint.
x;
}
std::string label =
format(
"Inference time of 1 round: %.2f ms", t1);
std::string label2 =
format(
"Average time of 200 rounds: %.2f ms", timeRecorder.
getTimeMilli()/200);
label =
format(
"%s: %.4f", (classes.empty() ?
format(
"Class #%d", classId).c_str() :
classes[classId].c_str()),
confidence);
}
return 0;
}
Designed for command line parsing.
Definition: utility.hpp:818
n-dimensional dense array class
Definition: mat.hpp:811
Mat reshape(int cn, int rows=0) const
Changes the shape and/or the number of channels of a 2D matrix without copying the data.
MatIterator_< _Tp > end()
Returns the matrix iterator and sets it to the after-last matrix element.
MatIterator_< _Tp > begin()
Returns the matrix iterator and sets it to the first matrix element.
_Tp x
x coordinate of the point
Definition: types.hpp:201
Template class for specifying the size of an image or rectangle.
Definition: types.hpp:335
a Class to measure passing time.
Definition: utility.hpp:295
void start()
starts counting ticks.
Definition: utility.hpp:304
void stop()
stops counting ticks.
Definition: utility.hpp:310
void reset()
resets internal values.
Definition: utility.hpp:374
double getTimeMilli() const
returns passed time in milliseconds.
Definition: utility.hpp:333
Class for video capturing from video files, image sequences or cameras.
Definition: videoio.hpp:728
virtual bool open(const String &filename, int apiPreference=CAP_ANY)
Opens a video file or a capturing device or an IP video stream for video capturing.
void exp(InputArray src, OutputArray dst)
Calculates the exponent of every array element.
void divide(InputArray src1, InputArray src2, OutputArray dst, double scale=1, int dtype=-1)
Performs per-element division of two arrays or a scalar by an array.
Scalar sum(InputArray src)
Calculates the sum of array elements.
void minMaxLoc(InputArray src, double *minVal, double *maxVal=0, Point *minLoc=0, Point *maxLoc=0, InputArray mask=noArray())
Finds the global minimum and maximum in an array.
std::string String
Definition: cvstd.hpp:152
String format(const char *fmt,...)
Returns a text string formatted using the printf-like expression.
#define CV_Error(code, msg)
Call the error handler.
Definition: base.hpp:320
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition: base.hpp:342
Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size &size=Size(), const Scalar &mean=Scalar(), bool swapRB=false, bool crop=false, int ddepth=CV_32F)
Creates 4-dimensional blob from image. Optionally resizes and crops image from center,...
Net readNet(const String &model, const String &config="", const String &framework="")
Read deep learning network represented in one of the supported formats.
void imshow(const String &winname, InputArray mat)
Displays an image in the specified window.
int waitKey(int delay=0)
Waits for a pressed key.
void namedWindow(const String &winname, int flags=WINDOW_AUTOSIZE)
Creates a window.
void putText(InputOutputArray img, const String &text, Point org, int fontFace, double fontScale, Scalar color, int thickness=1, int lineType=LINE_8, bool bottomLeftOrigin=false)
Draws a text string.
"black box" representation of the file storage associated with a file on disk.
Definition: core.hpp:106