Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1fbdca83
Commit
1fbdca83
authored
Nov 15, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #10083 from alalek:core_intrinsics_load_low
parents
fcdd8330
3a0039d2
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
47 additions
and
8 deletions
+47
-8
intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+21
-1
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+2
-0
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+4
-0
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+2
-0
vsx_utils.hpp
modules/core/include/opencv2/core/vsx_utils.hpp
+2
-7
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+16
-0
No files found.
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
View file @
1fbdca83
...
...
@@ -99,7 +99,7 @@ block and to save contents of the register to memory block.
@ref v_setall_s8, @ref v_setall_u8, ...,
@ref v_setzero_u8, @ref v_setzero_s8, ...
- Memory operations:
@ref v_load, @ref v_load_aligned, @ref v_load_halves,
@ref v_load, @ref v_load_aligned, @ref v_load_
low, @ref v_load_
halves,
@ref v_store, @ref v_store_aligned,
@ref v_store_high, @ref v_store_low
...
...
@@ -1080,6 +1080,26 @@ inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr)
return
v_reg
<
_Tp
,
V_SIMD128Traits
<
_Tp
>::
nlanes
>
(
ptr
);
}
/** @brief Load 64-bits of data to lower part (high part is undefined).
@param ptr memory block containing data for first half (0..n/2)
@code{.cpp}
int lo[2] = { 1, 2 };
v_int32x4 r = v_load_low(lo);
@endcode
*/
template
<
typename
_Tp
>
inline
v_reg
<
_Tp
,
V_SIMD128Traits
<
_Tp
>::
nlanes
>
v_load_low
(
const
_Tp
*
ptr
)
{
v_reg
<
_Tp
,
V_SIMD128Traits
<
_Tp
>::
nlanes
>
c
;
for
(
int
i
=
0
;
i
<
c
.
nlanes
/
2
;
i
++
)
{
c
.
s
[
i
]
=
ptr
[
i
];
}
return
c
;
}
/** @brief Load register contents from two memory blocks
@param loptr memory block containing data for first half (0..n/2)
...
...
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
1fbdca83
...
...
@@ -763,6 +763,8 @@ inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(vld1q_##suffix(ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(vld1q_##suffix(ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr), vdup_n_##suffix((_Tp)0))); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
inline void v_store(_Tp* ptr, const _Tpvec& a) \
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
1fbdca83
...
...
@@ -1016,6 +1016,8 @@ inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(_mm_loadu_si128((const __m128i*)ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(_mm_load_si128((const __m128i*)ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(_mm_loadl_epi64((const __m128i*)ptr)); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ \
return _Tpvec(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i*)ptr0), \
...
...
@@ -1044,6 +1046,8 @@ inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(_mm_loadu_##suffix(ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(_mm_load_##suffix(ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(_mm_castsi128_##suffix(_mm_loadl_epi64((const __m128i*)ptr))); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ \
return _Tpvec(_mm_castsi128_##suffix( \
...
...
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
1fbdca83
...
...
@@ -281,6 +281,8 @@ inline _Tpvec v_load(const _Tp* ptr) \
{ return _Tpvec(ld_func(0, ptr)); } \
inline _Tpvec v_load_aligned(const _Tp* ptr) \
{ return _Tpvec(ld_func(0, ptr)); } \
inline _Tpvec v_load_low(const _Tp* ptr) \
{ return _Tpvec(vec_ld_l8(ptr)); } \
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
inline void v_store(_Tp* ptr, const _Tpvec& a) \
...
...
modules/core/include/opencv2/core/vsx_utils.hpp
View file @
1fbdca83
...
...
@@ -556,17 +556,12 @@ VSX_IMPL_2VRG_F(vec_uint4, vec_udword2, "vpkudus %0,%2,%1", vec_packs)
* vec_ld_l8(ptr) -> Load 64-bits of integer data to lower part
* vec_ldz_l8(ptr) -> Load 64-bits of integer data to lower part and zero upper part
**/
#if defined(__clang__) && !defined(__IBMCPP__)
# define __VSX_LOAD_L8(Tvec, p) (Tvec)((vec_udword2)*((uint64*)(p)))
#else
# define __VSX_LOAD_L8(Tvec, p) *((Tvec*)(p))
#endif
#define VSX_IMPL_LOAD_L8(Tvec, Tp) \
FORCE_INLINE(Tvec) vec_ld_l8(const Tp *p) \
{ return
__VSX_LOAD_L8(Tvec, p); }
\
{ return
((Tvec)vec_promote(*((uint64*)p), 0)); }
\
FORCE_INLINE(Tvec) vec_ldz_l8(const Tp *p) \
{ \
/* TODO: try (Tvec)(vec_udword2{*((uint64*)p), 0}) */
\
static const vec_bdword2 mask = {0xFFFFFFFFFFFFFFFF, 0x0000000000000000}; \
return vec_and(vec_ld_l8(p), (Tvec)mask); \
}
...
...
modules/core/test/test_intrin_utils.hpp
View file @
1fbdca83
...
...
@@ -198,6 +198,22 @@ template<typename R> struct TheTest
EXPECT_EQ
(
data
.
a
[
0
],
r3
.
get0
());
EXPECT_EQ
(
data
.
u
[
0
],
r4
.
get0
());
R
r_low
=
v_load_low
((
LaneType
*
)
data
.
u
.
d
);
EXPECT_EQ
(
data
.
u
[
0
],
r_low
.
get0
());
v_store
(
out
.
u
.
d
,
r_low
);
for
(
int
i
=
0
;
i
<
R
::
nlanes
/
2
;
++
i
)
{
EXPECT_EQ
((
LaneType
)
data
.
u
[
i
],
(
LaneType
)
out
.
u
[
i
]);
}
R
r_low_align8byte
=
v_load_low
((
LaneType
*
)((
char
*
)
data
.
u
.
d
+
8
));
EXPECT_EQ
(
data
.
u
[
R
::
nlanes
/
2
],
r_low_align8byte
.
get0
());
v_store
(
out
.
u
.
d
,
r_low_align8byte
);
for
(
int
i
=
0
;
i
<
R
::
nlanes
/
2
;
++
i
)
{
EXPECT_EQ
((
LaneType
)
data
.
u
[
i
+
R
::
nlanes
/
2
],
(
LaneType
)
out
.
u
[
i
]);
}
// check some store methods
out
.
u
.
clear
();
out
.
a
.
clear
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment