Commit f8137023 authored by Evgeny Latkin's avatar Evgeny Latkin Committed by Alexander Alekhin

Merge pull request #13162 from elatkin:el/gapi_perf_rgb2gray

GAPI (fluid): RGB/BGR to gray: optimization (#13162)

* GAPI (fluid): RGB/BGR to Gray: add performance tests

* GAPI (fluid): RGB/BGR to Gray: speedup 8-12x with manual CV_SIMD

* GAPI (fluid): RGB/BGR to Gray: fix compiler warning

* GAPI (fluid): RGB/BGR to Gray: dynamic dispatching to AVX2

* GAPI (fluid): RGB/BGR to Gray: check R/G/B coefficients

* GAPI (fluid): RGB/BGR to Gray: fixed compilation error (caused by change in master)
parent 85fad150
...@@ -602,7 +602,7 @@ PERF_TEST_P_(RGB2GrayPerfTest, TestPerformance) ...@@ -602,7 +602,7 @@ PERF_TEST_P_(RGB2GrayPerfTest, TestPerformance)
TEST_CYCLE() TEST_CYCLE()
{ {
c.apply(in_mat1, out_mat_gapi, std::move(compile_args)); c.apply(in_mat1, out_mat_gapi);
} }
// Comparison ////////////////////////////////////////////////////////////// // Comparison //////////////////////////////////////////////////////////////
...@@ -640,7 +640,7 @@ PERF_TEST_P_(BGR2GrayPerfTest, TestPerformance) ...@@ -640,7 +640,7 @@ PERF_TEST_P_(BGR2GrayPerfTest, TestPerformance)
TEST_CYCLE() TEST_CYCLE()
{ {
c.apply(in_mat1, out_mat_gapi, std::move(compile_args)); c.apply(in_mat1, out_mat_gapi);
} }
// Comparison ////////////////////////////////////////////////////////////// // Comparison //////////////////////////////////////////////////////////////
......
...@@ -33,4 +33,14 @@ namespace opencv_test ...@@ -33,4 +33,14 @@ namespace opencv_test
Values(1, 2), Values(1, 2),
Values(cv::compile_args(IMGPROC_FLUID)))); Values(cv::compile_args(IMGPROC_FLUID))));
INSTANTIATE_TEST_CASE_P(RGB2GrayPerfTestFluid, RGB2GrayPerfTest,
Combine(Values(ToleranceColor(1e-3).to_compare_f()),
Values(szVGA, sz720p, sz1080p),
Values(cv::compile_args(IMGPROC_FLUID))));
INSTANTIATE_TEST_CASE_P(BGR2GrayPerfTestFluid, BGR2GrayPerfTest,
Combine(Values(ToleranceColor(1e-3).to_compare_f()),
Values(szVGA, sz720p, sz1080p),
Values(cv::compile_args(IMGPROC_FLUID))));
} }
...@@ -60,20 +60,15 @@ static void run_rgb2gray(Buffer &dst, const View &src, float coef_r, float coef_ ...@@ -60,20 +60,15 @@ static void run_rgb2gray(Buffer &dst, const View &src, float coef_r, float coef_
GAPI_Assert(dst.meta().chan == 1); GAPI_Assert(dst.meta().chan == 1);
GAPI_Assert(src.length() == dst.length()); GAPI_Assert(src.length() == dst.length());
GAPI_Assert(coef_r < 1 && coef_g < 1 && coef_b < 1);
GAPI_Assert(std::abs(coef_r + coef_g + coef_b - 1) < 0.001);
const auto *in = src.InLine<uchar>(0); const auto *in = src.InLine<uchar>(0);
auto *out = dst.OutLine<uchar>(); auto *out = dst.OutLine<uchar>();
int width = dst.length(); int width = dst.length();
// TODO: Vectorize for SIMD run_rgb2gray_impl(out, in, width, coef_r, coef_g, coef_b);
for (int w=0; w < width; w++)
{
uchar r = in[3*w ];
uchar g = in[3*w + 1];
uchar b = in[3*w + 2];
float result = coef_r*r + coef_g*g + coef_b*b;
out[w] = saturate<uchar>(result, roundf);
}
} }
GAPI_FLUID_KERNEL(GFluidRGB2GrayCustom, cv::gapi::imgproc::GRGB2GrayCustom, false) GAPI_FLUID_KERNEL(GFluidRGB2GrayCustom, cv::gapi::imgproc::GRGB2GrayCustom, false)
......
...@@ -8,13 +8,7 @@ ...@@ -8,13 +8,7 @@
#include "gfluidimgproc_func.hpp" #include "gfluidimgproc_func.hpp"
#include "gfluidimgproc_func.simd.hpp" #include "gfluidimgproc_func.simd.hpp"
#if 1 #include "backends/fluid/gfluidimgproc_func.simd_declarations.hpp"
// NB: workaround for CV_SIMD bug (or feature?):
// - dynamic dispatcher assumes *.simd.hpp is directly in src dir
#include "backends/fluid/gfluidimgproc_func.simd_declarations.hpp"
#else
#include "gfluidimgproc_func.simd_declarations.hpp"
#endif
#include "gfluidutils.hpp" #include "gfluidutils.hpp"
...@@ -33,6 +27,26 @@ namespace cv { ...@@ -33,6 +27,26 @@ namespace cv {
namespace gapi { namespace gapi {
namespace fluid { namespace fluid {
//----------------------------------
//
// Fluid kernels: RGB2Gray, BGR2Gray
//
//----------------------------------
void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
float coef_r, float coef_g, float coef_b)
{
CV_CPU_DISPATCH(run_rgb2gray_impl,
(out, in, width, coef_r, coef_g, coef_b),
CV_CPU_DISPATCH_MODES_ALL);
}
//---------------------
//
// Fluid kernels: Sobel
//
//---------------------
#define RUN_SOBEL_ROW(DST, SRC) \ #define RUN_SOBEL_ROW(DST, SRC) \
void run_sobel_row(DST out[], const SRC *in[], int width, int chan, \ void run_sobel_row(DST out[], const SRC *in[], int width, int chan, \
const float kx[], const float ky[], int border, \ const float kx[], const float ky[], int border, \
......
...@@ -14,6 +14,15 @@ namespace cv { ...@@ -14,6 +14,15 @@ namespace cv {
namespace gapi { namespace gapi {
namespace fluid { namespace fluid {
//----------------------------------
//
// Fluid kernels: RGB2Gray, BGR2Gray
//
//----------------------------------
void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
float coef_r, float coef_g, float coef_b);
//--------------------- //---------------------
// //
// Fluid kernels: Sobel // Fluid kernels: Sobel
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "opencv2/core.hpp" #include "opencv2/core.hpp"
#include "opencv2/core/hal/intrin.hpp" #include "opencv2/core/hal/intrin.hpp"
#include <cstdint>
#ifdef __GNUC__ #ifdef __GNUC__
# pragma GCC diagnostic push # pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wstrict-overflow" # pragma GCC diagnostic ignored "-Wstrict-overflow"
...@@ -25,7 +27,20 @@ namespace fluid { ...@@ -25,7 +27,20 @@ namespace fluid {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
//---------------------------------------------------------------------- //----------------------------------
//
// Fluid kernels: RGB2Gray, BGR2Gray
//
//----------------------------------
void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
float coef_r, float coef_g, float coef_b);
//---------------------
//
// Fluid kernels: Sobel
//
//---------------------
#define RUN_SOBEL_ROW(DST, SRC) \ #define RUN_SOBEL_ROW(DST, SRC) \
void run_sobel_row(DST out[], const SRC *in[], int width, int chan, \ void run_sobel_row(DST out[], const SRC *in[], int width, int chan, \
...@@ -46,8 +61,93 @@ RUN_SOBEL_ROW( float, float) ...@@ -46,8 +61,93 @@ RUN_SOBEL_ROW( float, float)
#undef RUN_SOBEL_ROW #undef RUN_SOBEL_ROW
//---------------------------------------------------------------------- //----------------------------------------------------------------------
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
//----------------------------------
//
// Fluid kernels: RGB2Gray, BGR2Gray
//
//----------------------------------
void run_rgb2gray_impl(uchar out[], const uchar in[], int width,
float coef_r, float coef_g, float coef_b)
{
// assume:
// - coefficients are less than 1
// - and their sum equals 1
constexpr int unity = 1 << 16; // Q0.0.16 inside ushort:
ushort rc = static_cast<ushort>(coef_r * unity + 0.5f);
ushort gc = static_cast<ushort>(coef_g * unity + 0.5f);
ushort bc = static_cast<ushort>(coef_b * unity + 0.5f);
GAPI_Assert(rc + gc + bc <= unity);
GAPI_Assert(rc + gc + bc >= USHRT_MAX);
#if CV_SIMD
constexpr int nlanes = v_uint8::nlanes;
if (width >= nlanes)
{
for (int w=0; w < width; )
{
// process main part of pixels row
for ( ; w <= width - nlanes; w += nlanes)
{
v_uint8 r, g, b;
v_load_deinterleave(&in[3*w], r, g, b);
v_uint16 r0, r1, g0, g1, b0, b1;
v_expand(r, r0, r1);
v_expand(g, g0, g1);
v_expand(b, b0, b1);
v_uint16 y0, y1;
static const ushort half = 1 << 7; // Q0.8.8
y0 = (v_mul_hi(r0 << 8, vx_setall_u16(rc)) +
v_mul_hi(g0 << 8, vx_setall_u16(gc)) +
v_mul_hi(b0 << 8, vx_setall_u16(bc)) +
vx_setall_u16(half)) >> 8;
y1 = (v_mul_hi(r1 << 8, vx_setall_u16(rc)) +
v_mul_hi(g1 << 8, vx_setall_u16(gc)) +
v_mul_hi(b1 << 8, vx_setall_u16(bc)) +
vx_setall_u16(half)) >> 8;
v_uint8 y;
y = v_pack(y0, y1);
v_store(&out[w], y);
}
// process tail (if any)
if (w < width)
{
GAPI_DbgAssert(width - nlanes >= 0);
w = width - nlanes;
}
}
return;
}
#endif
for (int w=0; w < width; w++)
{
uchar r = in[3*w ];
uchar g = in[3*w + 1];
uchar b = in[3*w + 2];
static const int half = 1 << 15; // Q0.0.16
ushort y = (r*rc + b*bc + g*gc + half) >> 16;
out[w] = static_cast<uchar>(y);
}
}
//---------------------
//
// Fluid kernels: Sobel
//
//---------------------
// Sobel 3x3: vertical pass // Sobel 3x3: vertical pass
template<bool noscale, typename DST> template<bool noscale, typename DST>
static void run_sobel3x3_vert(DST out[], int length, const float ky[], static void run_sobel3x3_vert(DST out[], int length, const float ky[],
...@@ -285,7 +385,6 @@ RUN_SOBEL_ROW( float, float) ...@@ -285,7 +385,6 @@ RUN_SOBEL_ROW( float, float)
#undef RUN_SOBEL_ROW #undef RUN_SOBEL_ROW
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
//----------------------------------------------------------------------
CV_CPU_OPTIMIZATION_NAMESPACE_END CV_CPU_OPTIMIZATION_NAMESPACE_END
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment