Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
d7ff9243
Commit
d7ff9243
authored
Oct 26, 2011
by
Andrey Kamaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Better NEON Hamming distance
parent
509730c1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
44 additions
and
40 deletions
+44
-40
internal.hpp
modules/core/include/opencv2/core/internal.hpp
+1
-1
stat.cpp
modules/core/src/stat.cpp
+7
-11
dist.h
modules/flann/include/opencv2/flann/dist.h
+26
-27
miniflann.cpp
modules/flann/src/miniflann.cpp
+4
-0
precomp.hpp
modules/flann/src/precomp.hpp
+6
-1
No files found.
modules/core/include/opencv2/core/internal.hpp
View file @
d7ff9243
...
@@ -119,7 +119,7 @@ CV_INLINE IppiSize ippiSize(int width, int height)
...
@@ -119,7 +119,7 @@ CV_INLINE IppiSize ippiSize(int width, int height)
#define CV_SSE3 0
#define CV_SSE3 0
#endif
#endif
#if defined ANDROID && defined __ARM_NEON__
#if defined ANDROID && defined __ARM_NEON__
&& defined __GNUC__
#include "arm_neon.h"
#include "arm_neon.h"
#define CV_NEON 1
#define CV_NEON 1
...
...
modules/core/src/stat.cpp
View file @
d7ff9243
...
@@ -963,26 +963,22 @@ static const uchar popCountTable4[] =
...
@@ -963,26 +963,22 @@ static const uchar popCountTable4[] =
int
normHamming
(
const
uchar
*
a
,
const
uchar
*
b
,
int
n
)
int
normHamming
(
const
uchar
*
a
,
const
uchar
*
b
,
int
n
)
{
{
int
i
=
0
,
result
=
0
;
int
i
=
0
,
result
=
0
;
#if
defined __GNUC__ &&
CV_NEON
#if CV_NEON
if
(
CPU_HAS_NEON_FEATURE
)
if
(
CPU_HAS_NEON_FEATURE
)
{
{
result
=
0
;
uint32x4_t
bits
=
vmovq_n_u32
(
0
);
for
(
;
i
<=
n
-
16
;
i
+=
16
)
for
(;
i
<=
n
-
16
;
i
+=
16
)
{
{
uint8x16_t
A_vec
=
vld1q_u8
(
a
+
i
);
uint8x16_t
A_vec
=
vld1q_u8
(
a
+
i
);
uint8x16_t
B_vec
=
vld1q_u8
(
b
+
i
);
uint8x16_t
B_vec
=
vld1q_u8
(
b
+
i
);
//uint8x16_t veorq_u8 (uint8x16_t, uint8x16_t)
uint8x16_t
AxorB
=
veorq_u8
(
A_vec
,
B_vec
);
uint8x16_t
AxorB
=
veorq_u8
(
A_vec
,
B_vec
);
uint8x16_t
bitsSet
=
vcntq_u8
(
AxorB
);
uint8x16_t
bitsSet
=
vcntq_u8
(
AxorB
);
//uint16x8_t vpadalq_u8 (uint16x8_t, uint8x16_t)
uint16x8_t
bitSet8
=
vpaddlq_u8
(
bitsSet
);
uint16x8_t
bitSet8
=
vpaddlq_u8
(
bitsSet
);
uint32x4_t
bitSet4
=
vpaddlq_u16
(
bitSet8
);
uint32x4_t
bitSet4
=
vpaddlq_u16
(
bitSet8
);
bits
=
vaddq_u32
(
bits
,
bitSet4
);
uint64x2_t
bitSet2
=
vpaddlq_u32
(
bitSet4
);
result
+=
vgetq_lane_u64
(
bitSet2
,
0
);
result
+=
vgetq_lane_u64
(
bitSet2
,
1
);
}
}
uint64x2_t
bitSet2
=
vpaddlq_u32
(
bits
);
result
=
vgetq_lane_s32
(
vreinterpretq_s32_u64
(
bitSet2
),
0
);
result
+=
vgetq_lane_s32
(
vreinterpretq_s32_u64
(
bitSet2
),
2
);
}
}
else
else
#endif
#endif
...
...
modules/flann/include/opencv2/flann/dist.h
View file @
d7ff9243
...
@@ -421,43 +421,42 @@ struct Hamming
...
@@ -421,43 +421,42 @@ struct Hamming
{
{
ResultType
result
=
0
;
ResultType
result
=
0
;
#if __GNUC__
#if __GNUC__
#if
ANDROID && HAVE
_NEON
#if
CV
_NEON
static
uint64_t
features
=
android_getCpuFeatures
();
if
(
CPU_HAS_NEON_FEATURE
)
{
if
((
features
&
ANDROID_CPU_ARM_FEATURE_NEON
))
{
uint32x4_t
bits
=
vmovq_n_u32
(
0
);
for
(
size_t
i
=
0
;
i
<
size
;
i
+=
16
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
+=
16
)
{
uint8x16_t
A_vec
=
vld1q_u8
(
a
+
i
);
uint8x16_t
A_vec
=
vld1q_u8
(
a
+
i
);
uint8x16_t
B_vec
=
vld1q_u8
(
b
+
i
);
uint8x16_t
B_vec
=
vld1q_u8
(
b
+
i
);
//uint8x16_t veorq_u8 (uint8x16_t, uint8x16_t)
uint8x16_t
AxorB
=
veorq_u8
(
A_vec
,
B_vec
);
uint8x16_t
AxorB
=
veorq_u8
(
A_vec
,
B_vec
);
uint8x16_t
bitsSet
=
vcntq_u8
(
AxorB
);
uint8x16_t
bitsSet
+=
vcntq_u8
(
AxorB
);
//uint16x8_t vpadalq_u8 (uint16x8_t, uint8x16_t)
uint16x8_t
bitSet8
=
vpaddlq_u8
(
bitsSet
);
uint16x8_t
bitSet8
=
vpaddlq_u8
(
bitsSet
);
uint32x4_t
bitSet4
=
vpaddlq_u16
(
bitSet8
);
uint32x4_t
bitSet4
=
vpaddlq_u16
(
bitSet8
);
bits
=
vaddq_u32
(
bits
,
bitSet4
);
uint64x2_t
bitSet2
=
vpaddlq_u32
(
bitSet4
);
result
+=
vgetq_lane_u64
(
bitSet2
,
0
);
result
+=
vgetq_lane_u64
(
bitSet2
,
1
);
}
}
uint64x2_t
bitSet2
=
vpaddlq_u32
(
bits
);
result
=
vgetq_lane_s32
(
vreinterpretq_s32_u64
(
bitSet2
),
0
);
result
+=
vgetq_lane_s32
(
vreinterpretq_s32_u64
(
bitSet2
),
2
);
}
}
else
else
#endif
#endif
//for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
{
typedef
unsigned
long
long
pop_t
;
//for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
const
size_t
modulo
=
size
%
sizeof
(
pop_t
);
typedef
unsigned
long
long
pop_t
;
const
pop_t
*
a2
=
reinterpret_cast
<
const
pop_t
*>
(
a
);
const
size_t
modulo
=
size
%
sizeof
(
pop_t
);
const
pop_t
*
b2
=
reinterpret_cast
<
const
pop_t
*>
(
b
);
const
pop_t
*
a2
=
reinterpret_cast
<
const
pop_t
*>
(
a
);
const
pop_t
*
a2_end
=
a2
+
(
size
/
sizeof
(
pop_t
));
const
pop_t
*
b2
=
reinterpret_cast
<
const
pop_t
*>
(
b
);
const
pop_t
*
a2_end
=
a2
+
(
size
/
sizeof
(
pop_t
));
for
(;
a2
!=
a2_end
;
++
a2
,
++
b2
)
result
+=
__builtin_popcountll
((
*
a2
)
^
(
*
b2
));
for
(;
a2
!=
a2_end
;
++
a2
,
++
b2
)
result
+=
__builtin_popcountll
((
*
a2
)
^
(
*
b2
));
if
(
modulo
)
{
//in the case where size is not dividable by sizeof(size_t)
if
(
modulo
)
{
//need to mask off the bits at the end
//in the case where size is not dividable by sizeof(size_t)
pop_t
a_final
=
0
,
b_final
=
0
;
//need to mask off the bits at the end
memcpy
(
&
a_final
,
a2
,
modulo
);
pop_t
a_final
=
0
,
b_final
=
0
;
memcpy
(
&
b_final
,
b2
,
modulo
);
memcpy
(
&
a_final
,
a2
,
modulo
);
result
+=
__builtin_popcountll
(
a_final
^
b_final
);
memcpy
(
&
b_final
,
b2
,
modulo
);
result
+=
__builtin_popcountll
(
a_final
^
b_final
);
}
}
}
#else
#else
HammingLUT
lut
;
HammingLUT
lut
;
...
...
modules/flann/src/miniflann.cpp
View file @
d7ff9243
...
@@ -312,7 +312,11 @@ buildIndex(void*& index, const Mat& data, const IndexParams& params, const Dista
...
@@ -312,7 +312,11 @@ buildIndex(void*& index, const Mat& data, const IndexParams& params, const Dista
buildIndex_
<
Distance
,
::
cvflann
::
Index
<
Distance
>
>
(
index
,
data
,
params
,
dist
);
buildIndex_
<
Distance
,
::
cvflann
::
Index
<
Distance
>
>
(
index
,
data
,
params
,
dist
);
}
}
#if CV_NEON
typedef
::
cvflann
::
Hamming
<
uchar
>
HammingDistance
;
#else
typedef
::
cvflann
::
HammingLUT
HammingDistance
;
typedef
::
cvflann
::
HammingLUT
HammingDistance
;
#endif
typedef
::
cvflann
::
LshIndex
<
HammingDistance
>
LshIndex
;
typedef
::
cvflann
::
LshIndex
<
HammingDistance
>
LshIndex
;
Index
::
Index
()
Index
::
Index
()
...
...
modules/flann/src/precomp.hpp
View file @
d7ff9243
...
@@ -5,6 +5,12 @@
...
@@ -5,6 +5,12 @@
#include <cstdarg>
#include <cstdarg>
#include <sstream>
#include <sstream>
#ifdef HAVE_CVCONFIG_H
# include "cvconfig.h"
#endif
#include "opencv2/core/core.hpp"
#include "opencv2/core/internal.hpp"
#include "opencv2/flann/miniflann.hpp"
#include "opencv2/flann/miniflann.hpp"
#include "opencv2/flann/dist.h"
#include "opencv2/flann/dist.h"
#include "opencv2/flann/index_testing.h"
#include "opencv2/flann/index_testing.h"
...
@@ -15,7 +21,6 @@
...
@@ -15,7 +21,6 @@
// index types
// index types
#include "opencv2/flann/all_indices.h"
#include "opencv2/flann/all_indices.h"
#include "opencv2/flann/flann_base.hpp"
#include "opencv2/flann/flann_base.hpp"
#endif
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment