Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
9548093b
Commit
9548093b
authored
Oct 11, 2018
by
Vitaly Tuzov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Horizontal line processing for pyrDown() reworked using wide universal intrinsics.
parent
3bc9912f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
76 additions
and
40 deletions
+76
-40
intrin_avx.hpp
modules/core/include/opencv2/core/hal/intrin_avx.hpp
+10
-0
intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+0
-1
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+54
-35
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+8
-4
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+4
-0
pyramids.cpp
modules/imgproc/src/pyramids.cpp
+0
-0
No files found.
modules/core/include/opencv2/core/hal/intrin_avx.hpp
View file @
9548093b
...
...
@@ -1610,6 +1610,16 @@ inline v_int16x16 v_pack_triplets(const v_int16x16& vec)
}
inline
v_uint16x16
v_pack_triplets
(
const
v_uint16x16
&
vec
)
{
return
v_reinterpret_as_u16
(
v_pack_triplets
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int32x8
v_pack_triplets
(
const
v_int32x8
&
vec
)
{
return
v_int32x8
(
_mm256_permutevar8x32_epi32
(
vec
.
val
,
_mm256_set_epi64x
(
0x0000000700000007
,
0x0000000600000005
,
0x0000000400000002
,
0x0000000100000000
)));
}
inline
v_uint32x8
v_pack_triplets
(
const
v_uint32x8
&
vec
)
{
return
v_reinterpret_as_u32
(
v_pack_triplets
(
v_reinterpret_as_s32
(
vec
)));
}
inline
v_float32x8
v_pack_triplets
(
const
v_float32x8
&
vec
)
{
return
v_float32x8
(
_mm256_permutevar8x32_ps
(
vec
.
val
,
_mm256_set_epi64x
(
0x0000000700000007
,
0x0000000600000005
,
0x0000000400000002
,
0x0000000100000000
)));
}
////////// Matrix operations /////////
inline
v_int32x8
v_dotprod
(
const
v_int16x16
&
a
,
const
v_int16x16
&
b
)
...
...
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
View file @
9548093b
...
...
@@ -1908,7 +1908,6 @@ template<typename _Tp, int n> inline v_reg<_Tp, n> v_interleave_quads(const v_re
template
<
typename
_Tp
,
int
n
>
inline
v_reg
<
_Tp
,
n
>
v_pack_triplets
(
const
v_reg
<
_Tp
,
n
>&
vec
)
{
v_reg
<
float
,
n
>
c
;
int
j
=
0
;
for
(
int
i
=
0
;
i
<
n
/
4
;
i
++
)
{
c
.
s
[
3
*
i
]
=
vec
.
s
[
4
*
i
];
...
...
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
9548093b
...
...
@@ -1597,29 +1597,49 @@ inline v_int8x16 v_lut(const schar* tab, const int* idx)
}
inline
v_int8x16
v_lut_pairs
(
const
schar
*
tab
,
const
int
*
idx
)
{
s
hort
CV_DECL_ALIGNED
(
32
)
elems
[
8
]
=
s
char
CV_DECL_ALIGNED
(
32
)
elems
[
16
]
=
{
*
(
short
*
)(
tab
+
idx
[
0
]),
*
(
short
*
)(
tab
+
idx
[
1
]),
*
(
short
*
)(
tab
+
idx
[
2
]),
*
(
short
*
)(
tab
+
idx
[
3
]),
*
(
short
*
)(
tab
+
idx
[
4
]),
*
(
short
*
)(
tab
+
idx
[
5
]),
*
(
short
*
)(
tab
+
idx
[
6
]),
*
(
short
*
)(
tab
+
idx
[
7
])
tab
[
idx
[
0
]],
tab
[
idx
[
0
]
+
1
],
tab
[
idx
[
1
]],
tab
[
idx
[
1
]
+
1
],
tab
[
idx
[
2
]],
tab
[
idx
[
2
]
+
1
],
tab
[
idx
[
3
]],
tab
[
idx
[
3
]
+
1
],
tab
[
idx
[
4
]],
tab
[
idx
[
4
]
+
1
],
tab
[
idx
[
5
]],
tab
[
idx
[
5
]
+
1
],
tab
[
idx
[
6
]],
tab
[
idx
[
6
]
+
1
],
tab
[
idx
[
7
]],
tab
[
idx
[
7
]
+
1
]
};
return
v_int8x16
(
v
reinterpretq_s8_s16
(
vld1q_s16
(
elems
)
));
return
v_int8x16
(
v
ld1q_s8
(
elems
));
}
inline
v_int8x16
v_lut_quads
(
const
schar
*
tab
,
const
int
*
idx
)
{
int
CV_DECL_ALIGNED
(
32
)
elems
[
4
]
=
schar
CV_DECL_ALIGNED
(
32
)
elems
[
16
]
=
{
*
(
int
*
)(
tab
+
idx
[
0
]),
*
(
int
*
)(
tab
+
idx
[
1
]),
*
(
int
*
)(
tab
+
idx
[
2
]),
*
(
int
*
)(
tab
+
idx
[
3
])
tab
[
idx
[
0
]],
tab
[
idx
[
0
]
+
1
],
tab
[
idx
[
0
]
+
2
],
tab
[
idx
[
0
]
+
3
],
tab
[
idx
[
1
]],
tab
[
idx
[
1
]
+
1
],
tab
[
idx
[
1
]
+
2
],
tab
[
idx
[
1
]
+
3
],
tab
[
idx
[
2
]],
tab
[
idx
[
2
]
+
1
],
tab
[
idx
[
2
]
+
2
],
tab
[
idx
[
2
]
+
3
],
tab
[
idx
[
3
]],
tab
[
idx
[
3
]
+
1
],
tab
[
idx
[
3
]
+
2
],
tab
[
idx
[
3
]
+
3
]
};
return
v_int8x16
(
v
reinterpretq_s8_s32
(
vld1q_s32
(
elems
)
));
return
v_int8x16
(
v
ld1q_s8
(
elems
));
}
inline
v_uint8x16
v_lut
(
const
uchar
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u8
(
v_lut
((
schar
*
)
tab
,
idx
));
}
inline
v_uint8x16
v_lut_pairs
(
const
uchar
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u8
(
v_lut_pairs
((
schar
*
)
tab
,
idx
));
}
...
...
@@ -1642,23 +1662,22 @@ inline v_int16x8 v_lut(const short* tab, const int* idx)
}
inline
v_int16x8
v_lut_pairs
(
const
short
*
tab
,
const
int
*
idx
)
{
int
CV_DECL_ALIGNED
(
32
)
elems
[
4
]
=
short
CV_DECL_ALIGNED
(
32
)
elems
[
8
]
=
{
*
(
int
*
)(
tab
+
idx
[
0
]),
*
(
int
*
)(
tab
+
idx
[
1
]),
*
(
int
*
)(
tab
+
idx
[
2
]),
*
(
int
*
)(
tab
+
idx
[
3
])
tab
[
idx
[
0
]],
tab
[
idx
[
0
]
+
1
],
tab
[
idx
[
1
]],
tab
[
idx
[
1
]
+
1
],
tab
[
idx
[
2
]],
tab
[
idx
[
2
]
+
1
],
tab
[
idx
[
3
]],
tab
[
idx
[
3
]
+
1
]
};
return
v_int16x8
(
v
reinterpretq_s16_s32
(
vld1q_s32
(
elems
)
));
return
v_int16x8
(
v
ld1q_s16
(
elems
));
}
inline
v_int16x8
v_lut_quads
(
const
short
*
tab
,
const
int
*
idx
)
{
int64
CV_DECL_ALIGNED
(
32
)
elems
[
2
]
=
{
*
(
int64
*
)(
tab
+
idx
[
0
]),
*
(
int64
*
)(
tab
+
idx
[
1
])
};
return
v_int16x8
(
vreinterpretq_s16_s64
(
vld1q_s64
(
elems
)));
return
v_int16x8
(
vcombine_s16
(
vld1_s16
(
tab
+
idx
[
0
]),
vld1_s16
(
tab
+
idx
[
1
])));
}
inline
v_uint16x8
v_lut
(
const
ushort
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u16
(
v_lut
((
short
*
)
tab
,
idx
));
}
inline
v_uint16x8
v_lut_pairs
(
const
ushort
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u16
(
v_lut_pairs
((
short
*
)
tab
,
idx
));
}
...
...
@@ -1677,12 +1696,7 @@ inline v_int32x4 v_lut(const int* tab, const int* idx)
}
inline
v_int32x4
v_lut_pairs
(
const
int
*
tab
,
const
int
*
idx
)
{
int64
CV_DECL_ALIGNED
(
32
)
elems
[
2
]
=
{
*
(
int64
*
)(
tab
+
idx
[
0
]),
*
(
int64
*
)(
tab
+
idx
[
1
])
};
return
v_int32x4
(
vreinterpretq_s32_s64
(
vld1q_s64
(
elems
)));
return
v_int32x4
(
vcombine_s32
(
vld1_s32
(
tab
+
idx
[
0
]),
vld1_s32
(
tab
+
idx
[
1
])));
}
inline
v_int32x4
v_lut_quads
(
const
int
*
tab
,
const
int
*
idx
)
{
...
...
@@ -1800,7 +1814,8 @@ inline v_int16x8 v_interleave_pairs(const v_int16x8& vec)
inline
v_uint16x8
v_interleave_pairs
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_interleave_pairs
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int16x8
v_interleave_quads
(
const
v_int16x8
&
vec
)
{
return
v_int16x8
(
vreinterpretq_s16_s8
(
vcombine_s8
(
vtbl1_s8
(
vget_low_s8
(
vreinterpretq_s8_s16
(
vec
.
val
)),
vcreate_s8
(
0x0b0a030209080100
)),
vtbl1_s8
(
vget_high_s8
(
vreinterpretq_s8_s16
(
vec
.
val
)),
vcreate_s8
(
0x0b0a030209080100
)))));
int16x4x2_t
res
=
vzip_s16
(
vget_low_s16
(
vec
.
val
),
vget_high_s16
(
vec
.
val
));
return
v_int16x8
(
vcombine_s16
(
res
.
val
[
0
],
res
.
val
[
1
]));
}
inline
v_uint16x8
v_interleave_quads
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_interleave_quads
(
v_reinterpret_as_s16
(
vec
)));
}
...
...
@@ -1824,6 +1839,10 @@ inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
}
inline
v_uint16x8
v_pack_triplets
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_pack_triplets
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int32x4
v_pack_triplets
(
const
v_int32x4
&
vec
)
{
return
vec
;
}
inline
v_uint32x4
v_pack_triplets
(
const
v_uint32x4
&
vec
)
{
return
vec
;
}
inline
v_float32x4
v_pack_triplets
(
const
v_float32x4
&
vec
)
{
return
vec
;
}
#if CV_SIMD128_64F
inline
v_float64x2
v_lut
(
const
double
*
tab
,
const
int
*
idx
)
{
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
9548093b
...
...
@@ -2789,7 +2789,7 @@ inline v_int32x4 v_lut_pairs(const int* tab, const int* idx)
}
inline
v_int32x4
v_lut_quads
(
const
int
*
tab
,
const
int
*
idx
)
{
return
v_int32x4
(
_mm_load_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
])));
return
v_int32x4
(
_mm_load
u
_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
])));
}
inline
v_uint32x4
v_lut
(
const
unsigned
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u32
(
v_lut
((
const
int
*
)
tab
,
idx
));
}
inline
v_uint32x4
v_lut_pairs
(
const
unsigned
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u32
(
v_lut_pairs
((
const
int
*
)
tab
,
idx
));
}
...
...
@@ -2801,7 +2801,7 @@ inline v_int64x2 v_lut(const int64_t* tab, const int* idx)
}
inline
v_int64x2
v_lut_pairs
(
const
int64_t
*
tab
,
const
int
*
idx
)
{
return
v_int64x2
(
_mm_load_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
])));
return
v_int64x2
(
_mm_load
u
_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
])));
}
inline
v_uint64x2
v_lut
(
const
uint64_t
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u64
(
v_lut
((
const
int64_t
*
)
tab
,
idx
));
}
inline
v_uint64x2
v_lut_pairs
(
const
uint64_t
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u64
(
v_lut_pairs
((
const
int64_t
*
)
tab
,
idx
));
}
...
...
@@ -2817,7 +2817,7 @@ inline v_float64x2 v_lut(const double* tab, const int* idx)
{
return
v_float64x2
(
_mm_setr_pd
(
tab
[
idx
[
0
]],
tab
[
idx
[
1
]]));
}
inline
v_float64x2
v_lut_pairs
(
const
double
*
tab
,
const
int
*
idx
)
{
return
v_float64x2
(
_mm_castsi128_pd
(
_mm_load_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
]))));
}
inline
v_float64x2
v_lut_pairs
(
const
double
*
tab
,
const
int
*
idx
)
{
return
v_float64x2
(
_mm_castsi128_pd
(
_mm_load
u
_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
]))));
}
inline
v_int32x4
v_lut
(
const
int
*
tab
,
const
v_int32x4
&
idxvec
)
{
...
...
@@ -2932,7 +2932,7 @@ inline v_int8x16 v_pack_triplets(const v_int8x16& vec)
return
v_int8x16
(
_mm_shuffle_epi8
(
vec
.
val
,
_mm_set_epi64x
(
0xffffff0f0e0d0c0a
,
0x0908060504020100
)));
#else
__m128i
mask
=
_mm_set1_epi64x
(
0x00000000FFFFFFFF
);
__m128i
a
=
_mm_
or_si128
(
_mm_andnot_si128
(
mask
,
vec
.
val
),
_mm_and_si128
(
mask
,
_mm_sll_epi32
(
vec
.
val
,
_mm_set_epi64x
(
0
,
8
)))
);
__m128i
a
=
_mm_
srli_si128
(
_mm_or_si128
(
_mm_andnot_si128
(
mask
,
vec
.
val
),
_mm_and_si128
(
mask
,
_mm_sll_epi32
(
vec
.
val
,
_mm_set_epi64x
(
0
,
8
)))),
1
);
return
v_int8x16
(
_mm_srli_si128
(
_mm_shufflelo_epi16
(
a
,
_MM_SHUFFLE
(
2
,
1
,
0
,
3
)),
2
));
#endif
}
...
...
@@ -2948,6 +2948,10 @@ inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
}
inline
v_uint16x8
v_pack_triplets
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_pack_triplets
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int32x4
v_pack_triplets
(
const
v_int32x4
&
vec
)
{
return
vec
;
}
inline
v_uint32x4
v_pack_triplets
(
const
v_uint32x4
&
vec
)
{
return
vec
;
}
inline
v_float32x4
v_pack_triplets
(
const
v_float32x4
&
vec
)
{
return
vec
;
}
////////////// FP16 support ///////////////////////////
inline
v_float32x4
v_load_expand
(
const
float16_t
*
ptr
)
...
...
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
9548093b
...
...
@@ -1160,6 +1160,10 @@ inline v_int16x8 v_pack_triplets(const v_int16x8& vec)
}
inline
v_uint16x8
v_pack_triplets
(
const
v_uint16x8
&
vec
)
{
return
v_reinterpret_as_u16
(
v_pack_triplets
(
v_reinterpret_as_s16
(
vec
)));
}
inline
v_int32x4
v_pack_triplets
(
const
v_int32x4
&
vec
)
{
return
vec
;
}
inline
v_uint32x4
v_pack_triplets
(
const
v_uint32x4
&
vec
)
{
return
vec
;
}
inline
v_float32x4
v_pack_triplets
(
const
v_float32x4
&
vec
)
{
return
vec
;
}
/////// FP16 support ////////
// [TODO] implement these 2 using VSX or universal intrinsics (copy from intrin_sse.cpp and adopt)
...
...
modules/imgproc/src/pyramids.cpp
View file @
9548093b
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment