Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
595fd275
Commit
595fd275
authored
Aug 31, 2016
by
Maksim Shabunin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7182 from mself:two_channel_universal_intrinsics
parents
d4ae7f32
9678d48e
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
111 additions
and
8 deletions
+111
-8
intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+47
-8
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+13
-0
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+24
-0
test_intrin.cpp
modules/core/test/test_intrin.cpp
+27
-0
No files found.
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
View file @
595fd275
...
...
@@ -103,7 +103,7 @@ block and to save contents of the register to memory block.
These operations allow to reorder or recombine elements in one or multiple vectors.
- Interleave, deinterleave (3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
- Interleave, deinterleave (
2,
3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
- Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand
- Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
...
...
@@ -1075,12 +1075,31 @@ v_load_expand_q(const _Tp* ptr)
return
c
;
}
/** @brief Load and deinterleave (
4
channels)
/** @brief Load and deinterleave (
2
channels)
Load data from memory deinterleave and store to
4
registers.
Load data from memory deinterleave and store to
2
registers.
Scheme:
@code
{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}
{A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}
@endcode
For all types except 64-bit. */
template
<
typename
_Tp
,
int
n
>
inline
void
v_load_deinterleave
(
const
_Tp
*
ptr
,
v_reg
<
_Tp
,
n
>&
a
,
v_reg
<
_Tp
,
n
>&
b
)
{
int
i
,
i2
;
for
(
i
=
i2
=
0
;
i
<
n
;
i
++
,
i2
+=
2
)
{
a
.
s
[
i
]
=
ptr
[
i2
];
b
.
s
[
i
]
=
ptr
[
i2
+
1
];
}
}
/** @brief Load and deinterleave (3 channels)
Load data from memory deinterleave and store to 3 registers.
Scheme:
@code
{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
@endcode
For all types except 64-bit. */
template
<
typename
_Tp
,
int
n
>
inline
void
v_load_deinterleave
(
const
_Tp
*
ptr
,
v_reg
<
_Tp
,
n
>&
a
,
...
...
@@ -1095,12 +1114,12 @@ template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_
}
}
/** @brief Load and deinterleave (
3
channels)
/** @brief Load and deinterleave (
4
channels)
Load data from memory deinterleave and store to
3
registers.
Load data from memory deinterleave and store to
4
registers.
Scheme:
@code
{A1 B1 C1
A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C
2 ...}
{A1 B1 C1
D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D
2 ...}
@endcode
For all types except 64-bit. */
template
<
typename
_Tp
,
int
n
>
...
...
@@ -1118,12 +1137,32 @@ inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
}
}
/** @brief Interleave and store (2 channels)
Interleave and store data from 2 registers to memory.
Scheme:
@code
{A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}
@endcode
For all types except 64-bit. */
template
<
typename
_Tp
,
int
n
>
inline
void
v_store_interleave
(
_Tp
*
ptr
,
const
v_reg
<
_Tp
,
n
>&
a
,
const
v_reg
<
_Tp
,
n
>&
b
)
{
int
i
,
i2
;
for
(
i
=
i2
=
0
;
i
<
n
;
i
++
,
i2
+=
2
)
{
ptr
[
i2
]
=
a
.
s
[
i
];
ptr
[
i2
+
1
]
=
b
.
s
[
i
];
}
}
/** @brief Interleave and store (3 channels)
Interleave and store data from 3 registers to memory.
Scheme:
@code
{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D
2 ...}
{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
==> {A1 B1 C1 A2 B2 C
2 ...}
@endcode
For all types except 64-bit. */
template
<
typename
_Tp
,
int
n
>
...
...
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
595fd275
...
...
@@ -809,6 +809,12 @@ OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32)
OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4
(
float32x4
,
f32
)
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \
inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
{ \
_Tpvec##x2_t v = vld2q_##suffix(ptr); \
a.val = v.val[0]; \
b.val = v.val[1]; \
} \
inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
{ \
_Tpvec##x3_t v = vld3q_##suffix(ptr); \
...
...
@@ -825,6 +831,13 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
c.val = v.val[2]; \
d.val = v.val[3]; \
} \
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \
{ \
_Tpvec##x2_t v; \
v.val[0] = a.val; \
v.val[1] = b.val; \
vst2q_##suffix(ptr, v); \
} \
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
{ \
_Tpvec##x3_t v; \
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
595fd275
...
...
@@ -1374,6 +1374,18 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
v_transpose4x4
(
u0
,
u1
,
u2
,
u3
,
a
,
b
,
c
,
d
);
}
// 2-channel, float only
inline
void
v_load_deinterleave
(
const
float
*
ptr
,
v_float32x4
&
a
,
v_float32x4
&
b
)
{
const
int
mask_lo
=
_MM_SHUFFLE
(
2
,
0
,
2
,
0
),
mask_hi
=
_MM_SHUFFLE
(
3
,
1
,
3
,
1
);
__m128
u0
=
_mm_loadu_ps
(
ptr
);
// a0 b0 a1 b1
__m128
u1
=
_mm_loadu_ps
((
ptr
+
4
));
// a2 b2 a3 b3
a
.
val
=
_mm_shuffle_ps
(
u0
,
u1
,
mask_lo
);
// a0 a1 a2 a3
b
.
val
=
_mm_shuffle_ps
(
u0
,
u1
,
mask_hi
);
// b0 b1 ab b3
}
inline
void
v_store_interleave
(
uchar
*
ptr
,
const
v_uint8x16
&
a
,
const
v_uint8x16
&
b
,
const
v_uint8x16
&
c
)
{
...
...
@@ -1529,6 +1541,18 @@ inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint3
v_store
(
ptr
+
12
,
t3
);
}
// 2-channel, float only
inline
void
v_store_interleave
(
float
*
ptr
,
const
v_float32x4
&
a
,
const
v_float32x4
&
b
)
{
// a0 a1 a2 a3 ...
// b0 b1 b2 b3 ...
__m128
u0
=
_mm_unpacklo_ps
(
a
.
val
,
b
.
val
);
// a0 b0 a1 b1
__m128
u1
=
_mm_unpackhi_ps
(
a
.
val
,
b
.
val
);
// a2 b2 a3 b3
_mm_storeu_ps
(
ptr
,
u0
);
_mm_storeu_ps
((
ptr
+
4
),
u1
);
}
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
_Tpvec& b0, _Tpvec& c0 ) \
...
...
modules/core/test/test_intrin.cpp
View file @
595fd275
...
...
@@ -132,6 +132,32 @@ template<typename R> struct TheTest
return
*
this
;
}
// float32x4 only
TheTest
&
test_interleave_2channel
()
{
Data
<
R
>
data1
,
data2
;
data2
+=
20
;
R
a
=
data1
,
b
=
data2
;
LaneType
buf2
[
R
::
nlanes
*
2
];
v_store_interleave
(
buf2
,
a
,
b
);
Data
<
R
>
z
(
0
);
a
=
b
=
z
;
v_load_deinterleave
(
buf2
,
a
,
b
);
for
(
int
i
=
0
;
i
<
R
::
nlanes
;
++
i
)
{
EXPECT_EQ
(
data1
,
Data
<
R
>
(
a
));
EXPECT_EQ
(
data2
,
Data
<
R
>
(
b
));
}
return
*
this
;
}
// v_expand and v_load_expand
TheTest
&
test_expand
()
{
...
...
@@ -846,6 +872,7 @@ TEST(hal_intrin, float32x4) {
TheTest
<
v_float32x4
>
()
.
test_loadstore
()
.
test_interleave
()
.
test_interleave_2channel
()
.
test_addsub
()
.
test_mul
()
.
test_div
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment