Merge pull request #11297 from seiko2plus:VSXImprovements_1

2129db6e · Alexander Alekhin · 0b9d0759 · 56ec10bf · 2129db6e · 2129db6e
Commit 2129db6e authored Apr 12, 2018 by Alexander Alekhin
Showing with 33 additions and 94 deletions

CMakeLists.txt CMakeLists.txt +1 -1

intrin_vsx.hpp modules/core/include/opencv2/core/hal/intrin_vsx.hpp +32 -93

vsx_utils.hpp modules/core/include/opencv2/core/vsx_utils.hpp +0 -0

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -318,7 +318,7 @@ OCV_OPTION(ENABLE_PROFILING           "Enable profiling in the GCC compiler (Add
 OCV_OPTION(ENABLE_COVERAGE            "Enable coverage collection with  GCov"                    OFF  IF CV_GCC )
 OCV_OPTION(ENABLE_OMIT_FRAME_POINTER  "Enable -fomit-frame-pointer for GCC"                      ON   IF CV_GCC )
 OCV_OPTION(ENABLE_POWERPC             "Enable PowerPC for GCC"                                   ON   IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
-OCV_OPTION(ENABLE_VSX                 "Enable POWER8 and above VSX (64-bit little-endian)"       ON   IF (CV_GCC AND PPC64LE) )
+OCV_OPTION(ENABLE_VSX                 "Enable POWER8 and above VSX (64-bit little-endian)"       ON   IF ((CV_GCC OR CV_CLANG) AND PPC64LE) )
 OCV_OPTION(ENABLE_FAST_MATH           "Enable -ffast-math (not recommended for GCC 4.6.x)"       OFF  IF (CV_GCC AND (X86 OR X86_64)) )
 OCV_OPTION(ENABLE_NEON                "Enable NEON instructions"                                 (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
 OCV_OPTION(ENABLE_VFPV3               "Enable VFPv3-D32 instructions"                            OFF  IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )

--- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                          License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
-// Copyright (C) 2015, Itseez Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html

 #ifndef OPENCV_HAL_VSX_HPP
 #define OPENCV_HAL_VSX_HPP
@@ -276,34 +236,38 @@ OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)

-#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
+#define OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, ld, ld_a, st, st_a)    \
 inline _Tpvec v_load(const _Tp* ptr)                                        \
-{ return _Tpvec(ld_func(0, ptr)); }                                         \
-inline _Tpvec v_load_aligned(const _Tp* ptr)                                \
-{ return _Tpvec(ld_func(0, ptr)); }                                         \
+{ return _Tpvec(ld(0, ptr)); }                                              \
+inline _Tpvec v_load_aligned(VSX_UNUSED(const _Tp* ptr))                    \
+{ return _Tpvec(ld_a(0, ptr)); }                                            \
 inline _Tpvec v_load_low(const _Tp* ptr)                                    \
 { return _Tpvec(vec_ld_l8(ptr)); }                                          \
 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1)               \
 { return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); }          \
 inline void v_store(_Tp* ptr, const _Tpvec& a)                              \
-{ st_func(a.val, 0, ptr); }                                                 \
-inline void v_store_aligned(_Tp* ptr, const _Tpvec& a)                      \
-{ st_func(a.val, 0, ptr); }                                                 \
+{ st(a.val, 0, ptr); }                                                      \
+inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a)          \
+{ st_a(a.val, 0, ptr); }                                                    \
 inline void v_store_low(_Tp* ptr, const _Tpvec& a)                          \
 { vec_st_l8(a.val, ptr); }                                                  \
 inline void v_store_high(_Tp* ptr, const _Tpvec& a)                         \
 { vec_st_h8(a.val, ptr); }

-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
-OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
+#define OPENCV_HAL_IMPL_VSX_LOADSTORE(_Tpvec, _Tp) \
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(_Tpvec, _Tp, vsx_ld, vec_ld, vsx_st, vec_st)
+
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint8x16,  uchar)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int8x16,   schar)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint16x8,  ushort)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int16x8,   short)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_uint32x4,  uint)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_int32x4,   int)
+OPENCV_HAL_IMPL_VSX_LOADSTORE(v_float32x4, float)
+
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_float64x2, double, vsx_ld,  vsx_ld,  vsx_st,  vsx_st)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_uint64x2,  uint64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)
+OPENCV_HAL_IMPL_VSX_LOADSTORE_C(v_int64x2,    int64, vsx_ld2, vsx_ld2, vsx_st2, vsx_st2)

 //////////////// Value reordering ///////////////

@@ -343,7 +307,7 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1)   \
    b1.val = fl(a.val);                                           \
 }                                                                 \
 inline _Tpwvec v_load_expand(const _Tp* ptr)                      \
-{ return _Tpwvec(fh(vsx_ld(0, ptr))); }
+{ return _Tpwvec(fh(vec_ld_l8(ptr))); }

 OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
 OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
@@ -353,10 +317,10 @@ OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpac
 OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)

 inline v_uint32x4 v_load_expand_q(const uchar* ptr)
-{ return v_uint32x4(vec_ld_buw(ptr)); }
+{ return v_uint32x4(vec_uint4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }

 inline v_int32x4 v_load_expand_q(const schar* ptr)
-{ return v_int32x4(vec_ld_bsw(ptr)); }
+{ return v_int32x4(vec_int4_set(ptr[0], ptr[1], ptr[2], ptr[3])); }

 /* pack */
 #define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack)    \
@@ -429,36 +393,6 @@ inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
    d.val = vec_mergesql(a.val, b.val);
 }

-/* Extract */
-template<int s, typename _Tpvec>
-inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
-{
-    const int w = sizeof(typename _Tpvec::lane_type);
-    const int n = _Tpvec::nlanes;
-    const unsigned int sf = ((w * n) - (s * w));
-    if (s == 0)
-        return _Tpvec(a.val);
-    else if (sf > 15)
-        return _Tpvec();
-    // bitwise it just to make xlc happy
-    return _Tpvec(vec_sld(b.val, a.val, sf & 15));
-}
-
-#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec)             \
-template<int s>                                           \
-inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
-{                                                         \
-    switch(s) {                                           \
-    case 0: return _Tpvec(a.val);                         \
-    case 2: return _Tpvec(b.val);                         \
-    case 1: return _Tpvec(vec_sldw(b.val, a.val, 2));     \
-    default: return _Tpvec();                             \
-    }                                                     \
-}
-OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
-OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
-
-
 ////////// Arithmetic, bitwise and comparison operations /////////

 /* Element-wise binary and unary operations */
@@ -669,6 +603,11 @@ OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
 OPENCV_IMPL_VSX_ROTATE_64(v_int64x2,  left, b, a)
 OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)

+/* Extract */
+template<int s, typename _Tpvec>
+inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
+{ return v_rotate_right<s>(a, b); }
+
 ////////// Reduce and mask /////////

 /** Reduce **/

--- a/modules/core/include/opencv2/core/vsx_utils.hpp
+++ b/modules/core/include/opencv2/core/vsx_utils.hpp