Commit c928c21f authored by Evgeny Latkin's avatar Evgeny Latkin Committed by Alexander Alekhin

Merge pull request #13319 from elatkin:el/gapi_perf_erdilate_2

GAPI (fluid): Erode/Dilate optimization, part 2 (#13319)

* GAPI (fluid): Erode/Dilate optimization: hard-code 3x3 case

* GAPI (fluid): Erode/Dilate optimization: CPU dispatcher

* GAPI (fluid): Erode/Dilate optimization: speed-up 10-15x times with CV_SIMD

* GAPI (fluid): Erode/Dilate optimization: 20-30% speed-up
parent 992d5b8b
...@@ -1164,12 +1164,34 @@ GAPI_FLUID_KERNEL(GFluidFilter2D, cv::gapi::imgproc::GFilter2D, true) ...@@ -1164,12 +1164,34 @@ GAPI_FLUID_KERNEL(GFluidFilter2D, cv::gapi::imgproc::GFilter2D, true)
// //
//----------------------------- //-----------------------------
static MorphShape detect_morph3x3_shape(const uchar kernel[])
{
const uchar k[3][3] = {
{ kernel[0], kernel[1], kernel[2]},
{ kernel[3], kernel[4], kernel[5]},
{ kernel[6], kernel[7], kernel[8]}
};
if (k[0][0] && k[0][1] && k[0][2] &&
k[1][0] && k[1][1] && k[1][2] &&
k[2][0] && k[2][1] && k[2][2])
return M_FULL;
if (!k[0][0] && k[0][1] && !k[0][2] &&
k[1][0] && k[1][1] && k[1][2] &&
!k[2][0] && k[2][1] && !k[2][2])
return M_CROSS;
return M_UNDEF;
}
template<typename DST, typename SRC> template<typename DST, typename SRC>
static void run_morphology( Buffer& dst, static void run_morphology( Buffer& dst,
const View & src, const View & src,
const uchar k[], const uchar k[],
int k_rows, int k_rows,
int k_cols, int k_cols,
MorphShape k_type,
const cv::Point & /* anchor */, const cv::Point & /* anchor */,
Morphology morphology) Morphology morphology)
{ {
...@@ -1199,7 +1221,7 @@ static void run_morphology( Buffer& dst, ...@@ -1199,7 +1221,7 @@ static void run_morphology( Buffer& dst,
// call optimized code, if 3x3 // call optimized code, if 3x3
if (3 == k_rows && 3 == k_cols) if (3 == k_rows && 3 == k_cols)
{ {
run_morphology3x3_impl(out, in, width, chan, k, morphology); run_morphology3x3_impl(out, in, width, chan, k, k_type, morphology);
return; return;
} }
...@@ -1261,14 +1283,16 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true) ...@@ -1261,14 +1283,16 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true)
int k_rows = kernel.rows; int k_rows = kernel.rows;
int k_cols = kernel.cols; int k_cols = kernel.cols;
int k_size = k_rows * k_cols;
auto *k = scratch.OutLine<uchar>(); // copy of kernel.data auto *k = scratch.OutLine<uchar>(); // copy of kernel.data
auto k_type = static_cast<MorphShape>(k[k_size]);
// DST SRC OP __VA_ARGS__ // DST SRC OP __VA_ARGS__
UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, anchor, M_ERODE); UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_ERODE); UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
UNARY_( short, short, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_ERODE); UNARY_( short, short, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
UNARY_( float, float, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_ERODE); UNARY_( float, float, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
} }
...@@ -1283,8 +1307,9 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true) ...@@ -1283,8 +1307,9 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true)
{ {
int k_rows = kernel.rows; int k_rows = kernel.rows;
int k_cols = kernel.cols; int k_cols = kernel.cols;
int k_size = k_rows * k_cols;
cv::gapi::own::Size bufsize(k_rows * k_cols, 1); cv::gapi::own::Size bufsize(k_size + 1, 1);
GMatDesc bufdesc = {CV_8U, 1, bufsize}; GMatDesc bufdesc = {CV_8U, 1, bufsize};
Buffer buffer(bufdesc); Buffer buffer(bufdesc);
scratch = std::move(buffer); scratch = std::move(buffer);
...@@ -1292,6 +1317,11 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true) ...@@ -1292,6 +1317,11 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true)
// FIXME: move to resetScratch stage ? // FIXME: move to resetScratch stage ?
auto *k = scratch.OutLine<uchar>(); auto *k = scratch.OutLine<uchar>();
getKernel(k, kernel); getKernel(k, kernel);
if (3 == k_rows && 3 == k_cols)
k[k_size] = static_cast<uchar>(detect_morph3x3_shape(k));
else
k[k_size] = static_cast<uchar>(M_UNDEF);
} }
static void resetScratch(Buffer& /* scratch */) static void resetScratch(Buffer& /* scratch */)
...@@ -1339,14 +1369,16 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true) ...@@ -1339,14 +1369,16 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true)
int k_rows = kernel.rows; int k_rows = kernel.rows;
int k_cols = kernel.cols; int k_cols = kernel.cols;
int k_size = k_rows * k_cols;
auto *k = scratch.OutLine<uchar>(); // copy of kernel.data auto *k = scratch.OutLine<uchar>(); // copy of kernel.data
auto k_type = static_cast<MorphShape>(k[k_size]);
// DST SRC OP __VA_ARGS__ // DST SRC OP __VA_ARGS__
UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, anchor, M_DILATE); UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_DILATE); UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
UNARY_( short, short, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_DILATE); UNARY_( short, short, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
UNARY_( float, float, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_DILATE); UNARY_( float, float, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
} }
...@@ -1361,8 +1393,9 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true) ...@@ -1361,8 +1393,9 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true)
{ {
int k_rows = kernel.rows; int k_rows = kernel.rows;
int k_cols = kernel.cols; int k_cols = kernel.cols;
int k_size = k_rows * k_cols;
cv::gapi::own::Size bufsize(k_rows * k_cols, 1); cv::gapi::own::Size bufsize(k_size + 1, 1);
GMatDesc bufdesc = {CV_8U, 1, bufsize}; GMatDesc bufdesc = {CV_8U, 1, bufsize};
Buffer buffer(bufdesc); Buffer buffer(bufdesc);
scratch = std::move(buffer); scratch = std::move(buffer);
...@@ -1370,6 +1403,11 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true) ...@@ -1370,6 +1403,11 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true)
// FIXME: move to resetScratch stage ? // FIXME: move to resetScratch stage ?
auto *k = scratch.OutLine<uchar>(); auto *k = scratch.OutLine<uchar>();
getKernel(k, kernel); getKernel(k, kernel);
if (3 == k_rows && 3 == k_cols)
k[k_size] = static_cast<uchar>(detect_morph3x3_shape(k));
else
k[k_size] = static_cast<uchar>(M_UNDEF);
} }
static void resetScratch(Buffer& /* scratch */) static void resetScratch(Buffer& /* scratch */)
......
...@@ -119,10 +119,11 @@ RUN_FILTER2D_3X3_IMPL( float, float) ...@@ -119,10 +119,11 @@ RUN_FILTER2D_3X3_IMPL( float, float)
#define RUN_MORPHOLOGY3X3_IMPL(T) \ #define RUN_MORPHOLOGY3X3_IMPL(T) \
void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \ void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \
const uchar k[], Morphology morphology) \ const uchar k[], MorphShape k_type, \
Morphology morphology) \
{ \ { \
CV_CPU_DISPATCH(run_morphology3x3_impl, \ CV_CPU_DISPATCH(run_morphology3x3_impl, \
(out, in, width, chan, k, morphology), \ (out, in, width, chan, k, k_type, morphology), \
CV_CPU_DISPATCH_MODES_ALL); \ CV_CPU_DISPATCH_MODES_ALL); \
} }
......
...@@ -85,9 +85,12 @@ RUN_FILTER2D_3X3_IMPL( float, float) ...@@ -85,9 +85,12 @@ RUN_FILTER2D_3X3_IMPL( float, float)
enum Morphology { M_ERODE, M_DILATE }; enum Morphology { M_ERODE, M_DILATE };
enum MorphShape { M_FULL, M_CROSS, M_UNDEF };
#define RUN_MORPHOLOGY3X3_IMPL(T) \ #define RUN_MORPHOLOGY3X3_IMPL(T) \
void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \ void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \
const uchar k[], Morphology morphology); const uchar k[], MorphShape k_type, \
Morphology morphology);
RUN_MORPHOLOGY3X3_IMPL(uchar ) RUN_MORPHOLOGY3X3_IMPL(uchar )
RUN_MORPHOLOGY3X3_IMPL(ushort) RUN_MORPHOLOGY3X3_IMPL(ushort)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment