imgproc: dispatch color*

Lab/XYZ modes have been postponed (color_lab.cpp): - need to split code for tables initialization and for pixels processing first - no significant performance improvements for switching between SSE42 / AVX2 code generation

imgproc: dispatch color*
Lab/XYZ modes have been postponed (color_lab.cpp): - need to split code for tables initialization and for pixels processing first - no significant performance improvements for switching between SSE42 / AVX2 code generation
8b541e45 · Alexander Alekhin · 39783a65 · 8b541e45 · 8b541e45 · 8b541e45
Commit 8b541e45 authored Mar 07, 2019 by Alexander Alekhin
13 changed files
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@@ -124,6 +124,10 @@

 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
 struct VZeroUpperGuard {
+#ifdef __GNUC__
+    __attribute__((always_inline))
+#endif
+    inline VZeroUpperGuard() { _mm256_zeroupper(); }
 #ifdef __GNUC__
    __attribute__((always_inline))
 #endif

--- a/modules/core/include/opencv2/core/private.hpp
+++ b/modules/core/include/opencv2/core/private.hpp
@@ -796,9 +796,9 @@ CV_EXPORTS InstrNode*   getCurrentNode();
 #endif

 #ifdef __CV_AVX_GUARD
-#define CV_INSTRUMENT_REGION(); __CV_AVX_GUARD CV_INSTRUMENT_REGION_();
+#define CV_INSTRUMENT_REGION() __CV_AVX_GUARD CV_INSTRUMENT_REGION_();
 #else
-#define CV_INSTRUMENT_REGION(); CV_INSTRUMENT_REGION_();
+#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_();
 #endif

 namespace cv {

--- a/modules/imgproc/CMakeLists.txt
+++ b/modules/imgproc/CMakeLists.txt
 set(the_description "Image Processing")
 ocv_add_dispatched_file(accum SSE4_1 AVX AVX2)
+ocv_add_dispatched_file(color_hsv SSE2 SSE4_1 AVX2)
+ocv_add_dispatched_file(color_rgb SSE2 SSE4_1 AVX2)
+ocv_add_dispatched_file(color_yuv SSE2 SSE4_1 AVX2)
 ocv_define_module(imgproc opencv_core WRAP java python js)
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -3,6 +3,7 @@
 // of this distribution and at http://opencv.org/license.html

 #include "precomp.hpp"
+#include "opencl_kernels_imgproc.hpp"
 #include "color.hpp"

 namespace cv

--- a/modules/imgproc/src/color.hpp
+++ b/modules/imgproc/src/color.hpp
@@ -3,59 +3,17 @@
 // of this distribution and at http://opencv.org/license.html

 #include "opencv2/imgproc.hpp"
-#include "opencv2/core/utility.hpp"
-#include <limits>
-#include "opencl_kernels_imgproc.hpp"
 #include "hal_replacement.hpp"
-#include "opencv2/core/hal/intrin.hpp"
-#include "opencv2/core/softfloat.hpp"

-#define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
-
-namespace cv
-{
-
-//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
-const float B2YF = 0.114f;
-const float G2YF = 0.587f;
-const float R2YF = 0.299f;
-
-enum
-{
-    yuv_shift = 14,
-    xyz_shift = 12,
-    R2Y = 4899, // == R2YF*16384
-    G2Y = 9617, // == G2YF*16384
-    B2Y = 1868, // == B2YF*16384
-    BLOCK_SIZE = 256
-};
-
-template<typename _Tp> struct ColorChannel
-{
-    typedef float worktype_f;
-    static _Tp max() { return std::numeric_limits<_Tp>::max(); }
-    static _Tp half() { return (_Tp)(max()/2 + 1); }
-};
-
-template<> struct ColorChannel<float>
-{
-    typedef float worktype_f;
-    static float max() { return 1.f; }
-    static float half() { return 0.5f; }
-};
-
-/*template<> struct ColorChannel<double>
-{
-    typedef double worktype_f;
-    static double max() { return 1.; }
-    static double half() { return 0.5; }
-};*/
+namespace cv {

 //
 // Helper functions
 //

-namespace {
+namespace impl {
+
+#include "color.simd_helpers.hpp"

 inline bool isHSV(int code)
 {
@@ -209,40 +167,9 @@ inline int uIndex(int code)
 }

 } // namespace::
+using namespace impl;

-template<int i0, int i1 = -1, int i2 = -1>
-struct Set
-{
-    static bool contains(int i)
-    {
-        return (i == i0 || i == i1 || i == i2);
-    }
-};
-
-template<int i0, int i1>
-struct Set<i0, i1, -1>
-{
-    static bool contains(int i)
-    {
-        return (i == i0 || i == i1);
-    }
-};
-
-template<int i0>
-struct Set<i0, -1, -1>
-{
-    static bool contains(int i)
-    {
-        return (i == i0);
-    }
-};
-
-enum SizePolicy
-{
-    TO_YUV, FROM_YUV, NONE
-};
-
-template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE >
+/*template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE >
 struct CvtHelper
 {
    CvtHelper(InputArray _src, OutputArray _dst, int dcn)
@@ -282,7 +209,7 @@ struct CvtHelper
    Mat src, dst;
    int depth, scn;
    Size dstSz;
-};
+};*/

 #ifdef HAVE_OPENCL

@@ -380,49 +307,7 @@ struct OclHelper

 #endif

-///////////////////////////// Top-level template function ////////////////////////////////
-
-template <typename Cvt>
-class CvtColorLoop_Invoker : public ParallelLoopBody
-{
-    typedef typename Cvt::channel_type _Tp;
-public:
-
-    CvtColorLoop_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, const Cvt& _cvt) :
-        ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_),
-        width(width_), cvt(_cvt)
-    {
-    }
-
-    virtual void operator()(const Range& range) const CV_OVERRIDE
-    {
-        CV_TRACE_FUNCTION();
-
-        const uchar* yS = src_data + static_cast<size_t>(range.start) * src_step;
-        uchar* yD = dst_data + static_cast<size_t>(range.start) * dst_step;

-        for( int i = range.start; i < range.end; ++i, yS += src_step, yD += dst_step )
-            cvt(reinterpret_cast<const _Tp*>(yS), reinterpret_cast<_Tp*>(yD), width);
-    }
-
-private:
-    const uchar * src_data;
-    const size_t src_step;
-    uchar * dst_data;
-    const size_t dst_step;
-    const int width;
-    const Cvt& cvt;
-
-    const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&);
-};
-
-template <typename Cvt>
-void CvtColorLoop(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt)
-{
-    parallel_for_(Range(0, height),
-                  CvtColorLoop_Invoker<Cvt>(src_data, src_step, dst_data, dst_step, width, cvt),
-                  (width * height) / static_cast<double>(1<<16));
-}

 #if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700)
 #  define NEED_IPP 1

--- a/modules/imgproc/src/color.simd_helpers.hpp
+++ b/modules/imgproc/src/color.simd_helpers.hpp
--- a/modules/imgproc/src/color_hsv.dispatch.cpp
+++ b/modules/imgproc/src/color_hsv.dispatch.cpp
--- a/modules/imgproc/src/color_hsv.simd.hpp
+++ b/modules/imgproc/src/color_hsv.simd.hpp
--- a/modules/imgproc/src/color_lab.cpp
+++ b/modules/imgproc/src/color_lab.cpp
@@ -9,6 +9,10 @@
 \**********************************************************************************/

 #include "precomp.hpp"
+#include "opencl_kernels_imgproc.hpp"
+#include "opencv2/core/hal/intrin.hpp"
+#include "opencv2/core/softfloat.hpp"
+
 #include "color.hpp"

 using cv::softfloat;

--- a/modules/imgproc/src/color_rgb.dispatch.cpp
+++ b/modules/imgproc/src/color_rgb.dispatch.cpp
--- a/modules/imgproc/src/color_rgb.simd.hpp
+++ b/modules/imgproc/src/color_rgb.simd.hpp
--- a/modules/imgproc/src/color_yuv.dispatch.cpp
+++ b/modules/imgproc/src/color_yuv.dispatch.cpp
--- a/modules/imgproc/src/color_yuv.simd.hpp
+++ b/modules/imgproc/src/color_yuv.simd.hpp