Commit c89780df authored by Dmitry Matveev's avatar Dmitry Matveev Committed by Alexander Alekhin

Merge pull request #16039 from dmatveev:dm/gapi_tutorial_interactive_face_detection

* G-API-NG/Docs: Added a tutorial page on interactive face detection sample

- Introduced a "--ser" option to run the pipeline serially for
  benchmarking purposes
- Reorganized sample code to better fit the documentation;
- Fixed a couple of issues (mainly typos) in the public headers

* G-API-NG/Docs: Reflected meta-less compilation in new G-API tutorial

* G-API-NG/Docs: Addressed review comments on Face Analytics Pipeline example
parent 3fddd3bf
......@@ -287,7 +287,7 @@ CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = svg
INTERACTIVE_SVG = YES
INTERACTIVE_SVG = NO
DOT_PATH =
DOTFILE_DIRS =
MSCFILE_DIRS =
......
......@@ -3,6 +3,20 @@
In this section you will learn about graph-based image processing and
how G-API module can be used for that.
- @subpage tutorial_gapi_interactive_face_detection
*Languages:* C++
*Compatibility:* \> OpenCV 4.2
*Author:* Dmitry Matveev
This tutorial illustrates how to build a hybrid video processing
pipeline with G-API where Deep Learning and image processing are
combined effectively to maximize the overall throughput. This
sample requires Intel® distribution of OpenVINO™ Toolkit version
2019R2 or later.
- @subpage tutorial_gapi_anisotropic_segmentation
*Languages:* C++
......
......@@ -24,12 +24,13 @@ namespace wip {
* Implement this interface if you want customize the way how data is
* streaming into GStreamingCompiled.
*
* Objects implementing this interface can be passes to
* GStreamingCompiled via setSource()/cv::gin(). Regular compiled
* graphs (GCompiled) don't support input objects of this type.
* Objects implementing this interface can be passed to
* GStreamingCompiled using setSource() with cv::gin(). Regular
* compiled graphs (GCompiled) don't support input objects of this
* type.
*
* Default cv::VideoCapture-based implementation is available, see
* cv::gapi::GCaptureSource.
* cv::gapi::wip::GCaptureSource.
*
* @note stream sources are passed to G-API via shared pointers, so
* please use ptr() when passing a IStreamSource implementation to
......
......@@ -30,7 +30,8 @@ const std::string keys =
"{ emom | | IE emotions recognition model IR }"
"{ emow | | IE emotions recognition model weights }"
"{ emod | | IE emotions recognition model device }"
"{ pure | | When set, no output is displayed. Useful for benchmarking }";
"{ pure | | When set, no output is displayed. Useful for benchmarking }"
"{ ser | | Run serially (no pipelining involved). Useful for benchmarking }";
struct Avg {
struct Elapsed {
......@@ -73,6 +74,7 @@ namespace custom {
// executed. The _how_ is defined at graph compilation stage (via parameters),
// not on the graph construction stage.
//! [G_API_NET]
// Face detector: takes one Mat, returns another Mat
G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector");
......@@ -84,7 +86,9 @@ G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "age-gender-recoginition");
// Emotion recognition - takes one Mat, returns another.
G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition");
//! [G_API_NET]
//! [Postproc]
// SSD Post-processing function - this is not a network but a kernel.
// The kernel body is declared separately, this is just an interface.
// This operation takes two Mats (detections and the source image),
......@@ -101,6 +105,7 @@ G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postpr
}
};
// OpenCV-based implementation of the above kernel.
GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
static void run(const cv::Mat &in_ssd_result,
const cv::Mat &in_frame,
......@@ -124,10 +129,12 @@ GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
if (image_id < 0.f) { // indicates end of detections
break;
}
if (confidence < 0.5f) { // fixme: hard-coded snapshot
if (confidence < 0.5f) { // a hard-coded snapshot
continue;
}
// Convert floating-point coordinates to the absolute image
// frame coordinates; clip by the source image boundaries.
cv::Rect rc;
rc.x = static_cast<int>(rc_left * upscale.width);
rc.y = static_cast<int>(rc_top * upscale.height);
......@@ -137,6 +144,8 @@ GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
}
}
};
//! [Postproc]
} // namespace custom
namespace labels {
......@@ -208,9 +217,11 @@ int main(int argc, char *argv[])
}
const std::string input = cmd.get<std::string>("input");
const bool no_show = cmd.get<bool>("pure");
const bool be_serial = cmd.get<bool>("ser");
// Express our processing pipeline. Lambda-based constructor
// is used to keep all temporary objects in a dedicated scope.
//! [GComputation]
cv::GComputation pp([]() {
// Declare an empty GMat - the beginning of the pipeline.
cv::GMat in;
......@@ -256,6 +267,7 @@ int main(int argc, char *argv[])
return cv::GComputation(cv::GIn(in),
cv::GOut(frame, faces, ages, genders, emotions));
});
//! [GComputation]
// Note: it might be very useful to have dimensions loaded at this point!
// After our computation is defined, specify how it should be executed.
......@@ -269,7 +281,8 @@ int main(int argc, char *argv[])
//
// OpenCV DNN backend will have its own parmater structure with settings
// relevant to OpenCV DNN module. Same applies to other possible inference
// backends, like cuDNN, etc (:-))
// backends...
//! [Param_Cfg]
auto det_net = cv::gapi::ie::Params<custom::Faces> {
cmd.get<std::string>("fdm"), // read cmd args: path to topology IR
cmd.get<std::string>("fdw"), // read cmd args: path to weights
......@@ -287,40 +300,54 @@ int main(int argc, char *argv[])
cmd.get<std::string>("emow"), // read cmd args: path to weights
cmd.get<std::string>("emod"), // read cmd args: device specifier
};
//! [Param_Cfg]
//! [Compile]
// Form a kernel package (with a single OpenCV-based implementation of our
// post-processing) and a network package (holding our three networks).x
// post-processing) and a network package (holding our three networks).
auto kernels = cv::gapi::kernels<custom::OCVPostProc>();
auto networks = cv::gapi::networks(det_net, age_net, emo_net);
// Compile our pipeline for a specific input image format (TBD - can be relaxed)
// and pass our kernels & networks as parameters.
// This is the place where G-API learns which networks & kernels we're actually
// operating with (the graph description itself known nothing about that).
auto cc = pp.compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size(1280,720)},
cv::compile_args(kernels, networks));
// Compile our pipeline and pass our kernels & networks as
// parameters. This is the place where G-API learns which
// networks & kernels we're actually operating with (the graph
// description itself known nothing about that).
auto cc = pp.compileStreaming(cv::compile_args(kernels, networks));
//! [Compile]
Avg avg;
std::size_t frames = 0u; // Frame counter (not produced by the graph)
std::cout << "Reading " << input << std::endl;
cc.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
// Duplicate huge portions of the code in if/else branches in the sake of
// better documentation snippets
if (!be_serial) {
//! [Source]
auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input);
cc.setSource(cv::gin(in_src));
//! [Source]
Avg avg;
avg.start();
//! [Run]
// After data source is specified, start the execution
cc.start();
cv::Mat frame;
std::vector<cv::Rect> faces;
std::vector<cv::Mat> out_ages;
std::vector<cv::Mat> out_genders;
std::vector<cv::Mat> out_emotions;
std::size_t frames = 0u;
// Declare data objects we will be receiving from the pipeline.
cv::Mat frame; // The captured frame itself
std::vector<cv::Rect> faces; // Array of detected faces
std::vector<cv::Mat> out_ages; // Array of inferred ages (one blob per face)
std::vector<cv::Mat> out_genders; // Array of inferred genders (one blob per face)
std::vector<cv::Mat> out_emotions; // Array of classified emotions (one blob per face)
// Implement different execution policies depending on the display option
// for the best performance.
while (cc.running()) {
auto out_vector = cv::gout(frame, faces, out_ages, out_genders, out_emotions);
if (no_show) {
// This is purely a video processing. No need to balance with UI rendering.
// Use a blocking pull() to obtain data. Break the loop if the stream is over.
// This is purely a video processing. No need to balance
// with UI rendering. Use a blocking pull() to obtain
// data. Break the loop if the stream is over.
if (!cc.pull(std::move(out_vector)))
break;
} else if (!cc.try_pull(std::move(out_vector))) {
......@@ -329,15 +356,46 @@ int main(int argc, char *argv[])
if (cv::waitKey(1) >= 0) break;
else continue;
}
// At this point we have data for sure (obtained in either blocking or non-blocking way).
// At this point we have data for sure (obtained in either
// blocking or non-blocking way).
frames++;
labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions);
labels::DrawFPS(frame, frames, avg.fps(frames));
if (!no_show) cv::imshow("Out", frame);
}
cc.stop();
std::cout << "Processed " << frames << " frames in " << avg.elapsed() << std::endl;
//! [Run]
} else { // (serial flag)
//! [Run_Serial]
cv::VideoCapture cap(input);
cv::Mat in_frame, frame; // The captured frame itself
std::vector<cv::Rect> faces; // Array of detected faces
std::vector<cv::Mat> out_ages; // Array of inferred ages (one blob per face)
std::vector<cv::Mat> out_genders; // Array of inferred genders (one blob per face)
std::vector<cv::Mat> out_emotions; // Array of classified emotions (one blob per face)
while (cap.read(in_frame)) {
pp.apply(cv::gin(in_frame),
cv::gout(frame, faces, out_ages, out_genders, out_emotions),
cv::compile_args(kernels, networks));
labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions);
frames++;
if (frames == 1u) {
// Start timer only after 1st frame processed -- compilation
// happens on-the-fly here
avg.start();
} else {
// Measurfe & draw FPS for all other frames
labels::DrawFPS(frame, frames, avg.fps(frames-1));
}
if (!no_show) {
cv::imshow("Out", frame);
if (cv::waitKey(1) >= 0) break;
}
}
//! [Run_Serial]
}
std::cout << "Processed " << frames << " frames in " << avg.elapsed()
<< " (" << avg.fps(frames) << " FPS)" << std::endl;
return 0;
}
#else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment