Commit c89780df authored by Dmitry Matveev's avatar Dmitry Matveev Committed by Alexander Alekhin

Merge pull request #16039 from dmatveev:dm/gapi_tutorial_interactive_face_detection

* G-API-NG/Docs: Added a tutorial page on interactive face detection sample

- Introduced a "--ser" option to run the pipeline serially for
  benchmarking purposes
- Reorganized sample code to better fit the documentation;
- Fixed a couple of issues (mainly typos) in the public headers

* G-API-NG/Docs: Reflected meta-less compilation in new G-API tutorial

* G-API-NG/Docs: Addressed review comments on Face Analytics Pipeline example
parent 3fddd3bf
...@@ -287,7 +287,7 @@ CALLER_GRAPH = NO ...@@ -287,7 +287,7 @@ CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = YES GRAPHICAL_HIERARCHY = YES
DIRECTORY_GRAPH = YES DIRECTORY_GRAPH = YES
DOT_IMAGE_FORMAT = svg DOT_IMAGE_FORMAT = svg
INTERACTIVE_SVG = YES INTERACTIVE_SVG = NO
DOT_PATH = DOT_PATH =
DOTFILE_DIRS = DOTFILE_DIRS =
MSCFILE_DIRS = MSCFILE_DIRS =
......
...@@ -3,6 +3,20 @@ ...@@ -3,6 +3,20 @@
In this section you will learn about graph-based image processing and In this section you will learn about graph-based image processing and
how G-API module can be used for that. how G-API module can be used for that.
- @subpage tutorial_gapi_interactive_face_detection
*Languages:* C++
*Compatibility:* \> OpenCV 4.2
*Author:* Dmitry Matveev
This tutorial illustrates how to build a hybrid video processing
pipeline with G-API where Deep Learning and image processing are
combined effectively to maximize the overall throughput. This
sample requires Intel® distribution of OpenVINO™ Toolkit version
2019R2 or later.
- @subpage tutorial_gapi_anisotropic_segmentation - @subpage tutorial_gapi_anisotropic_segmentation
*Languages:* C++ *Languages:* C++
......
...@@ -24,12 +24,13 @@ namespace wip { ...@@ -24,12 +24,13 @@ namespace wip {
* Implement this interface if you want customize the way how data is * Implement this interface if you want customize the way how data is
* streaming into GStreamingCompiled. * streaming into GStreamingCompiled.
* *
* Objects implementing this interface can be passes to * Objects implementing this interface can be passed to
* GStreamingCompiled via setSource()/cv::gin(). Regular compiled * GStreamingCompiled using setSource() with cv::gin(). Regular
* graphs (GCompiled) don't support input objects of this type. * compiled graphs (GCompiled) don't support input objects of this
* type.
* *
* Default cv::VideoCapture-based implementation is available, see * Default cv::VideoCapture-based implementation is available, see
* cv::gapi::GCaptureSource. * cv::gapi::wip::GCaptureSource.
* *
* @note stream sources are passed to G-API via shared pointers, so * @note stream sources are passed to G-API via shared pointers, so
* please use ptr() when passing a IStreamSource implementation to * please use ptr() when passing a IStreamSource implementation to
......
...@@ -30,7 +30,8 @@ const std::string keys = ...@@ -30,7 +30,8 @@ const std::string keys =
"{ emom | | IE emotions recognition model IR }" "{ emom | | IE emotions recognition model IR }"
"{ emow | | IE emotions recognition model weights }" "{ emow | | IE emotions recognition model weights }"
"{ emod | | IE emotions recognition model device }" "{ emod | | IE emotions recognition model device }"
"{ pure | | When set, no output is displayed. Useful for benchmarking }"; "{ pure | | When set, no output is displayed. Useful for benchmarking }"
"{ ser | | Run serially (no pipelining involved). Useful for benchmarking }";
struct Avg { struct Avg {
struct Elapsed { struct Elapsed {
...@@ -73,6 +74,7 @@ namespace custom { ...@@ -73,6 +74,7 @@ namespace custom {
// executed. The _how_ is defined at graph compilation stage (via parameters), // executed. The _how_ is defined at graph compilation stage (via parameters),
// not on the graph construction stage. // not on the graph construction stage.
//! [G_API_NET]
// Face detector: takes one Mat, returns another Mat // Face detector: takes one Mat, returns another Mat
G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector"); G_API_NET(Faces, <cv::GMat(cv::GMat)>, "face-detector");
...@@ -84,7 +86,9 @@ G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "age-gender-recoginition"); ...@@ -84,7 +86,9 @@ G_API_NET(AgeGender, <AGInfo(cv::GMat)>, "age-gender-recoginition");
// Emotion recognition - takes one Mat, returns another. // Emotion recognition - takes one Mat, returns another.
G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition"); G_API_NET(Emotions, <cv::GMat(cv::GMat)>, "emotions-recognition");
//! [G_API_NET]
//! [Postproc]
// SSD Post-processing function - this is not a network but a kernel. // SSD Post-processing function - this is not a network but a kernel.
// The kernel body is declared separately, this is just an interface. // The kernel body is declared separately, this is just an interface.
// This operation takes two Mats (detections and the source image), // This operation takes two Mats (detections and the source image),
...@@ -101,6 +105,7 @@ G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postpr ...@@ -101,6 +105,7 @@ G_API_OP(PostProc, <cv::GArray<cv::Rect>(cv::GMat, cv::GMat)>, "custom.fd_postpr
} }
}; };
// OpenCV-based implementation of the above kernel.
GAPI_OCV_KERNEL(OCVPostProc, PostProc) { GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
static void run(const cv::Mat &in_ssd_result, static void run(const cv::Mat &in_ssd_result,
const cv::Mat &in_frame, const cv::Mat &in_frame,
...@@ -124,10 +129,12 @@ GAPI_OCV_KERNEL(OCVPostProc, PostProc) { ...@@ -124,10 +129,12 @@ GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
if (image_id < 0.f) { // indicates end of detections if (image_id < 0.f) { // indicates end of detections
break; break;
} }
if (confidence < 0.5f) { // fixme: hard-coded snapshot if (confidence < 0.5f) { // a hard-coded snapshot
continue; continue;
} }
// Convert floating-point coordinates to the absolute image
// frame coordinates; clip by the source image boundaries.
cv::Rect rc; cv::Rect rc;
rc.x = static_cast<int>(rc_left * upscale.width); rc.x = static_cast<int>(rc_left * upscale.width);
rc.y = static_cast<int>(rc_top * upscale.height); rc.y = static_cast<int>(rc_top * upscale.height);
...@@ -137,6 +144,8 @@ GAPI_OCV_KERNEL(OCVPostProc, PostProc) { ...@@ -137,6 +144,8 @@ GAPI_OCV_KERNEL(OCVPostProc, PostProc) {
} }
} }
}; };
//! [Postproc]
} // namespace custom } // namespace custom
namespace labels { namespace labels {
...@@ -208,9 +217,11 @@ int main(int argc, char *argv[]) ...@@ -208,9 +217,11 @@ int main(int argc, char *argv[])
} }
const std::string input = cmd.get<std::string>("input"); const std::string input = cmd.get<std::string>("input");
const bool no_show = cmd.get<bool>("pure"); const bool no_show = cmd.get<bool>("pure");
const bool be_serial = cmd.get<bool>("ser");
// Express our processing pipeline. Lambda-based constructor // Express our processing pipeline. Lambda-based constructor
// is used to keep all temporary objects in a dedicated scope. // is used to keep all temporary objects in a dedicated scope.
//! [GComputation]
cv::GComputation pp([]() { cv::GComputation pp([]() {
// Declare an empty GMat - the beginning of the pipeline. // Declare an empty GMat - the beginning of the pipeline.
cv::GMat in; cv::GMat in;
...@@ -256,6 +267,7 @@ int main(int argc, char *argv[]) ...@@ -256,6 +267,7 @@ int main(int argc, char *argv[])
return cv::GComputation(cv::GIn(in), return cv::GComputation(cv::GIn(in),
cv::GOut(frame, faces, ages, genders, emotions)); cv::GOut(frame, faces, ages, genders, emotions));
}); });
//! [GComputation]
// Note: it might be very useful to have dimensions loaded at this point! // Note: it might be very useful to have dimensions loaded at this point!
// After our computation is defined, specify how it should be executed. // After our computation is defined, specify how it should be executed.
...@@ -269,7 +281,8 @@ int main(int argc, char *argv[]) ...@@ -269,7 +281,8 @@ int main(int argc, char *argv[])
// //
// OpenCV DNN backend will have its own parmater structure with settings // OpenCV DNN backend will have its own parmater structure with settings
// relevant to OpenCV DNN module. Same applies to other possible inference // relevant to OpenCV DNN module. Same applies to other possible inference
// backends, like cuDNN, etc (:-)) // backends...
//! [Param_Cfg]
auto det_net = cv::gapi::ie::Params<custom::Faces> { auto det_net = cv::gapi::ie::Params<custom::Faces> {
cmd.get<std::string>("fdm"), // read cmd args: path to topology IR cmd.get<std::string>("fdm"), // read cmd args: path to topology IR
cmd.get<std::string>("fdw"), // read cmd args: path to weights cmd.get<std::string>("fdw"), // read cmd args: path to weights
...@@ -287,40 +300,54 @@ int main(int argc, char *argv[]) ...@@ -287,40 +300,54 @@ int main(int argc, char *argv[])
cmd.get<std::string>("emow"), // read cmd args: path to weights cmd.get<std::string>("emow"), // read cmd args: path to weights
cmd.get<std::string>("emod"), // read cmd args: device specifier cmd.get<std::string>("emod"), // read cmd args: device specifier
}; };
//! [Param_Cfg]
//! [Compile]
// Form a kernel package (with a single OpenCV-based implementation of our // Form a kernel package (with a single OpenCV-based implementation of our
// post-processing) and a network package (holding our three networks).x // post-processing) and a network package (holding our three networks).
auto kernels = cv::gapi::kernels<custom::OCVPostProc>(); auto kernels = cv::gapi::kernels<custom::OCVPostProc>();
auto networks = cv::gapi::networks(det_net, age_net, emo_net); auto networks = cv::gapi::networks(det_net, age_net, emo_net);
// Compile our pipeline for a specific input image format (TBD - can be relaxed) // Compile our pipeline and pass our kernels & networks as
// and pass our kernels & networks as parameters. // parameters. This is the place where G-API learns which
// This is the place where G-API learns which networks & kernels we're actually // networks & kernels we're actually operating with (the graph
// operating with (the graph description itself known nothing about that). // description itself known nothing about that).
auto cc = pp.compileStreaming(cv::GMatDesc{CV_8U,3,cv::Size(1280,720)}, auto cc = pp.compileStreaming(cv::compile_args(kernels, networks));
cv::compile_args(kernels, networks)); //! [Compile]
Avg avg;
std::size_t frames = 0u; // Frame counter (not produced by the graph)
std::cout << "Reading " << input << std::endl; std::cout << "Reading " << input << std::endl;
cc.setSource(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input)); // Duplicate huge portions of the code in if/else branches in the sake of
// better documentation snippets
if (!be_serial) {
//! [Source]
auto in_src = cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input);
cc.setSource(cv::gin(in_src));
//! [Source]
Avg avg;
avg.start(); avg.start();
//! [Run]
// After data source is specified, start the execution
cc.start(); cc.start();
cv::Mat frame; // Declare data objects we will be receiving from the pipeline.
std::vector<cv::Rect> faces; cv::Mat frame; // The captured frame itself
std::vector<cv::Mat> out_ages; std::vector<cv::Rect> faces; // Array of detected faces
std::vector<cv::Mat> out_genders; std::vector<cv::Mat> out_ages; // Array of inferred ages (one blob per face)
std::vector<cv::Mat> out_emotions; std::vector<cv::Mat> out_genders; // Array of inferred genders (one blob per face)
std::size_t frames = 0u; std::vector<cv::Mat> out_emotions; // Array of classified emotions (one blob per face)
// Implement different execution policies depending on the display option // Implement different execution policies depending on the display option
// for the best performance. // for the best performance.
while (cc.running()) { while (cc.running()) {
auto out_vector = cv::gout(frame, faces, out_ages, out_genders, out_emotions); auto out_vector = cv::gout(frame, faces, out_ages, out_genders, out_emotions);
if (no_show) { if (no_show) {
// This is purely a video processing. No need to balance with UI rendering. // This is purely a video processing. No need to balance
// Use a blocking pull() to obtain data. Break the loop if the stream is over. // with UI rendering. Use a blocking pull() to obtain
// data. Break the loop if the stream is over.
if (!cc.pull(std::move(out_vector))) if (!cc.pull(std::move(out_vector)))
break; break;
} else if (!cc.try_pull(std::move(out_vector))) { } else if (!cc.try_pull(std::move(out_vector))) {
...@@ -329,15 +356,46 @@ int main(int argc, char *argv[]) ...@@ -329,15 +356,46 @@ int main(int argc, char *argv[])
if (cv::waitKey(1) >= 0) break; if (cv::waitKey(1) >= 0) break;
else continue; else continue;
} }
// At this point we have data for sure (obtained in either blocking or non-blocking way). // At this point we have data for sure (obtained in either
// blocking or non-blocking way).
frames++; frames++;
labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions); labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions);
labels::DrawFPS(frame, frames, avg.fps(frames)); labels::DrawFPS(frame, frames, avg.fps(frames));
if (!no_show) cv::imshow("Out", frame); if (!no_show) cv::imshow("Out", frame);
} }
cc.stop(); //! [Run]
std::cout << "Processed " << frames << " frames in " << avg.elapsed() << std::endl; } else { // (serial flag)
//! [Run_Serial]
cv::VideoCapture cap(input);
cv::Mat in_frame, frame; // The captured frame itself
std::vector<cv::Rect> faces; // Array of detected faces
std::vector<cv::Mat> out_ages; // Array of inferred ages (one blob per face)
std::vector<cv::Mat> out_genders; // Array of inferred genders (one blob per face)
std::vector<cv::Mat> out_emotions; // Array of classified emotions (one blob per face)
while (cap.read(in_frame)) {
pp.apply(cv::gin(in_frame),
cv::gout(frame, faces, out_ages, out_genders, out_emotions),
cv::compile_args(kernels, networks));
labels::DrawResults(frame, faces, out_ages, out_genders, out_emotions);
frames++;
if (frames == 1u) {
// Start timer only after 1st frame processed -- compilation
// happens on-the-fly here
avg.start();
} else {
// Measurfe & draw FPS for all other frames
labels::DrawFPS(frame, frames, avg.fps(frames-1));
}
if (!no_show) {
cv::imshow("Out", frame);
if (cv::waitKey(1) >= 0) break;
}
}
//! [Run_Serial]
}
std::cout << "Processed " << frames << " frames in " << avg.elapsed()
<< " (" << avg.fps(frames) << " FPS)" << std::endl;
return 0; return 0;
} }
#else #else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment