Commit 9f29506d authored by Andrey Kamaev's avatar Andrey Kamaev

Refactored NEON optimization usage

parent 94a5bf88
...@@ -447,6 +447,7 @@ if(UNIX) ...@@ -447,6 +447,7 @@ if(UNIX)
endif() endif()
if(WITH_V4L) if(WITH_V4L)
CHECK_MODULE(libv4l1 HAVE_LIBV4L) CHECK_MODULE(libv4l1 HAVE_LIBV4L)
CHECK_INCLUDE_FILE(linux/videodev.h HAVE_CAMV4L) CHECK_INCLUDE_FILE(linux/videodev.h HAVE_CAMV4L)
CHECK_INCLUDE_FILE(linux/videodev2.h HAVE_CAMV4L2) CHECK_INCLUDE_FILE(linux/videodev2.h HAVE_CAMV4L2)
...@@ -889,6 +890,7 @@ if(MSVC) ...@@ -889,6 +890,7 @@ if(MSVC)
# 64-bit portability warnings, in MSVC8 # 64-bit portability warnings, in MSVC8
if(MSVC80) if(MSVC80)
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} /Wp64") set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} /Wp64")
endif() endif()
#if(MSVC90) #if(MSVC90)
# set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} /D _BIND_TO_CURRENT_CRT_VERSION=1 /D _BIND_TO_CURRENT_VCLIBS_VERSION=1") # set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} /D _BIND_TO_CURRENT_CRT_VERSION=1 /D _BIND_TO_CURRENT_VCLIBS_VERSION=1")
...@@ -1016,10 +1018,6 @@ if(CMAKE_COMPILER_IS_GNUCXX) ...@@ -1016,10 +1018,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} -DNDEBUG") set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} -DNDEBUG")
set(EXTRA_C_FLAGS_DEBUG "${EXTRA_C_FLAGS_DEBUG} -O0 -ggdb3 -DDEBUG -D_DEBUG") set(EXTRA_C_FLAGS_DEBUG "${EXTRA_C_FLAGS_DEBUG} -O0 -ggdb3 -DDEBUG -D_DEBUG")
if(ANDROID)
#force compiler to interpret char as signed char
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsigned-char")
endif()
endif() endif()
# Extra link libs if the user selects building static libs: # Extra link libs if the user selects building static libs:
......
opencv_dir=`pwd`/../build #!/bin/sh
mkdir build cd `dirname $0`
opencv_build_dir=`pwd`/../build
mkdir -p build
cd build cd build
cmake -DOpenCVDIR=$opencv_dir -DCMAKE_TOOLCHAIN_FILE=$ANDTOOLCHAIN ..
cmake -DOpenCVDIR=$opencv_build_dir -DCMAKE_TOOLCHAIN_FILE=../../android.toolchain.cmake ..
opencv_dir=`pwd`/../build_neon #!/bin/sh
mkdir build_neon cd `dirname $0`
opencv_build_dir=`pwd`/../build_neon
mkdir -p build_neon
cd build_neon cd build_neon
cmake -DOpenCV_DIR=$opencv_dir -DARM_TARGETS="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=$ANDTOOLCHAIN ..
cmake -DOpenCVDIR=$opencv_build_dir -DARM_TARGET="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../../android.toolchain.cmake ..
...@@ -122,8 +122,13 @@ CV_INLINE IppiSize ippiSize(int width, int height) ...@@ -122,8 +122,13 @@ CV_INLINE IppiSize ippiSize(int width, int height)
#if defined ANDROID && defined __ARM_NEON__ #if defined ANDROID && defined __ARM_NEON__
#include "arm_neon.h" #include "arm_neon.h"
#define CV_NEON 1 #define CV_NEON 1
#define CPU_HAS_NEON_FEATURE (true)
//TODO: make real check using stuff from "cpu-features.h"
//((bool)android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON)
#else #else
#define CV_NEON 0 #define CV_NEON 0
#define CPU_HAS_NEON_FEATURE (false)
#endif #endif
#ifndef IPPI_CALL #ifndef IPPI_CALL
......
...@@ -44,11 +44,6 @@ ...@@ -44,11 +44,6 @@
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#if ANDROID && HAVE_NEON
#include <cpu-features.h>
#include <arm_neon.h>
#endif
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
...@@ -115,9 +110,8 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c ...@@ -115,9 +110,8 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c
{ {
#if __GNUC__ #if __GNUC__
ResultType result = 0; ResultType result = 0;
#if ANDROID && HAVE_NEON #if CV_NEON
static uint64_t features = android_getCpuFeatures(); if (CPU_HAS_NEON_FEATURE)
if ((features & ANDROID_CPU_ARM_FEATURE_NEON))
{ {
for (size_t i = 0; i < size; i += 16) for (size_t i = 0; i < size; i += 16)
{ {
...@@ -126,7 +120,7 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c ...@@ -126,7 +120,7 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c
//uint8x16_t veorq_u8 (uint8x16_t, uint8x16_t) //uint8x16_t veorq_u8 (uint8x16_t, uint8x16_t)
uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
uint8x16_t bitsSet += vcntq_u8 (AxorB); uint8x16_t bitsSet = vcntq_u8 (AxorB);
//uint16x8_t vpadalq_u8 (uint16x8_t, uint8x16_t) //uint16x8_t vpadalq_u8 (uint16x8_t, uint8x16_t)
uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
...@@ -138,25 +132,27 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c ...@@ -138,25 +132,27 @@ Hamming::ResultType Hamming::operator()(const unsigned char* a, const unsigned c
} }
else else
#endif #endif
//for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) {
typedef unsigned long long pop_t; //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
const size_t modulo = size % sizeof(pop_t); typedef unsigned long long pop_t;
const pop_t * a2 = reinterpret_cast<const pop_t*> (a); const size_t modulo = size % sizeof(pop_t);
const pop_t * b2 = reinterpret_cast<const pop_t*> (b); const pop_t * a2 = reinterpret_cast<const pop_t*> (a);
const pop_t * a2_end = a2 + (size/sizeof(pop_t)); const pop_t * b2 = reinterpret_cast<const pop_t*> (b);
const pop_t * a2_end = a2 + (size/sizeof(pop_t));
for (; a2 != a2_end; ++a2, ++b2) for (; a2 != a2_end; ++a2, ++b2)
result += __builtin_popcountll((*a2) ^ (*b2)); result += __builtin_popcountll((*a2) ^ (*b2));
if (modulo) if (modulo)
{ {
//in the case where size is not divisible by sizeof(size_t) //in the case where size is not divisible by sizeof(size_t)
//need to mask off the bits at the end //need to mask off the bits at the end
pop_t a_final=0,b_final=0; pop_t a_final=0,b_final=0;
memcpy(&a_final,a2,modulo); memcpy(&a_final,a2,modulo);
memcpy(&b_final,b2,modulo); memcpy(&b_final,b2,modulo);
result += __builtin_popcountll(a_final ^ b_final); result += __builtin_popcountll(a_final ^ b_final);
} }
}
return result; return result;
#else #else
return HammingLUT()(a,b,size); return HammingLUT()(a,b,size);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment