Commit d0777780 authored by Sayed Adel's avatar Sayed Adel

Added support for VSX

parent 2a253772
......@@ -298,6 +298,7 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CMAKE_COMPILER_IS_GNUCXX )
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF (CMAKE_COMPILER_IS_GNUCXX AND PPC64LE) )
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
......
......@@ -28,6 +28,7 @@
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
list(APPEND CPU_ALL_OPTIMIZATIONS VSX)
list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
......@@ -79,6 +80,7 @@ ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
ocv_optimization_process_obsolete_option(ENABLE_VSX VSX OFF)
macro(ocv_is_optimization_in_list resultvar check_opt)
set(__checked "")
......@@ -266,6 +268,15 @@ elseif(ARM OR AARCH64)
ocv_update(CPU_FP16_IMPLIES "NEON")
set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
endif()
elseif(PPC64LE)
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX")
ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")
if(CMAKE_COMPILER_IS_CLANGCXX AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc"))
ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec")
else()
ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8")
endif()
endif()
# Helper values for cmake-gui
......
......@@ -72,6 +72,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(ARM 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
set(AARCH64 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^ppc64le.*|PPC64LE.*")
set(PPC64LE 1)
endif()
# Workaround for 32-bit operating systems on 64-bit x86_64 processor
......
......@@ -31,6 +31,9 @@ elseif(ARM)
elseif(AARCH64)
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "arm64")
set(CPACK_RPM_PACKAGE_ARCHITECTURE "aarch64")
elseif(PPC64LE)
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "ppc64el")
set(CPACK_RPM_PACKAGE_ARCHITECTURE "ppc64le")
else()
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
......@@ -164,4 +167,4 @@ endif(NOT OPENCV_CUSTOM_PACKAGE_INFO)
include(CPack)
ENDif(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
\ No newline at end of file
ENDif(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
# if defined(__VSX__)
# include <altivec.h>
# else
# error "VSX is not supported"
# endif
int main()
{
__vector float testF = vec_splats(0.f);
testF = vec_madd(testF, testF, testF);
return 0;
}
......@@ -740,5 +740,6 @@ CV_EXPORTS_W void setUseIPP_NE(bool flag);
} // cv
#include "opencv2/core/neon_utils.hpp"
#include "opencv2/core/vsx_utils.hpp"
#endif //OPENCV_CORE_BASE_HPP
......@@ -99,6 +99,14 @@
# include <arm_neon.h>
#endif
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
# include <altivec.h>
# undef vector
# undef pixel
# undef bool
# define CV_VSX 1
#endif
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
......@@ -135,6 +143,12 @@ struct VZeroUpperGuard {
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
# include <arm_neon.h>
# define CV_NEON 1
#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
# include <altivec.h>
# undef vector
# undef pixel
# undef bool
# define CV_VSX 1
#endif
#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
......@@ -208,3 +222,7 @@ struct VZeroUpperGuard {
#ifndef CV_NEON
# define CV_NEON 0
#endif
#ifndef CV_VSX
# define CV_VSX 0
#endif
......@@ -180,5 +180,20 @@
#endif
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
# define CV_TRY_VSX 1
# define CV_CPU_HAS_SUPPORT_VSX 1
# define CV_CPU_CALL_VSX(fn, args) return (opt_VSX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
# define CV_TRY_VSX 1
# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
#else
# define CV_TRY_VSX 0
# define CV_CPU_HAS_SUPPORT_VSX 0
# define CV_CPU_CALL_VSX(fn, args)
#endif
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
......@@ -153,6 +153,8 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
#define CV_CPU_NEON 100
#define CV_CPU_VSX 200
// when adding to this list remember to update the following enum
#define CV_HARDWARE_MAX_FEATURE 255
......@@ -182,7 +184,9 @@ enum CpuFeatures {
CPU_AVX_512VBMI = 20,
CPU_AVX_512VL = 21,
CPU_NEON = 100
CPU_NEON = 100,
CPU_VSX = 200
};
......
This diff is collapsed.
......@@ -85,7 +85,7 @@
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/sse_utils.hpp"
#include "opencv2/core/neon_utils.hpp"
#include "opencv2/core/vsx_utils.hpp"
#include "arithm_core.hpp"
#include "hal_replacement.hpp"
......
......@@ -80,6 +80,18 @@ Mutex* __initialization_mutex_initializer = &getInitializationMutex();
# include <cpu-features.h>
#endif
#ifndef __VSX__
# if defined __PPC64__ && defined __linux__
# include "sys/auxv.h"
# ifndef AT_HWCAP2
# define AT_HWCAP2 26
# endif
# ifndef PPC_FEATURE2_ARCH_2_07
# define PPC_FEATURE2_ARCH_2_07 0x80000000
# endif
# endif
#endif
#if defined _WIN32 || defined WINCE
#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
#define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
......@@ -295,6 +307,8 @@ struct HWFeatures
g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL";
g_hwFeatureNames[CPU_NEON] = "NEON";
g_hwFeatureNames[CPU_VSX] = "VSX";
}
void initialize(void)
......@@ -504,6 +518,16 @@ struct HWFeatures
#endif
#endif
#ifdef __VSX__
have[CV_CPU_VSX] = true;
#elif (defined __PPC64__ && defined __linux__)
uint64 hwcaps = getauxval(AT_HWCAP);
uint64 hwcap2 = getauxval(AT_HWCAP2);
have[CV_CPU_VSX] = (hwcaps & PPC_FEATURE_PPC_LE && hwcaps & PPC_FEATURE_HAS_VSX && hwcap2 & PPC_FEATURE2_ARCH_2_07);
#else
have[CV_CPU_VSX] = false;
#endif
int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])))
{
......
......@@ -3091,6 +3091,9 @@ void printVersionInfo(bool useStdOut)
#if CV_FP16
if (checkHardwareSupport(CV_CPU_FP16)) cpu_features += " fp16";
#endif
#if CV_VSX
if (checkHardwareSupport(CV_CPU_VSX)) cpu_features += " VSX";
#endif
cpu_features.erase(0, 1); // erase initial space
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment