Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
cfaca432
Commit
cfaca432
authored
Apr 13, 2018
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #11169 from tomoaki0705:universalRemap
parents
a2d6ee2d
a82e70cd
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
103 additions
and
10 deletions
+103
-10
intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+24
-1
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+24
-1
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+33
-2
intrin_vsx.hpp
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
+6
-0
test_intrin_utils.hpp
modules/core/test/test_intrin_utils.hpp
+12
-2
perf_imgwarp.cpp
modules/imgproc/perf/opencl/perf_imgwarp.cpp
+1
-1
perf_warp.cpp
modules/imgproc/perf/perf_warp.cpp
+3
-3
imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+0
-0
No files found.
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
View file @
cfaca432
...
...
@@ -795,7 +795,7 @@ inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>
/** @brief Multiply and add
Returns \f$ a*b + c \f$
For floating point types only. */
For floating point types
and signed 32bit int
only. */
template
<
typename
_Tp
,
int
n
>
inline
v_reg
<
_Tp
,
n
>
v_muladd
(
const
v_reg
<
_Tp
,
n
>&
a
,
const
v_reg
<
_Tp
,
n
>&
b
,
const
v_reg
<
_Tp
,
n
>&
c
)
...
...
@@ -828,6 +828,29 @@ template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n
return
c
;
}
/** @brief Dot product of elements
Same as cv::v_dotprod, but add a third element to the sum of adjacent pairs.
Scheme:
@code
{A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
{A1B1+A2B2+C1 ...} // 32-bit
@endcode
Implemented only for 16-bit signed source type (v_int16x8).
*/
template
<
typename
_Tp
,
int
n
>
inline
v_reg
<
typename
V_TypeTraits
<
_Tp
>::
w_type
,
n
/
2
>
v_dotprod
(
const
v_reg
<
_Tp
,
n
>&
a
,
const
v_reg
<
_Tp
,
n
>&
b
,
const
v_reg
<
typename
V_TypeTraits
<
_Tp
>::
w_type
,
n
/
2
>&
c
)
{
typedef
typename
V_TypeTraits
<
_Tp
>::
w_type
w_type
;
v_reg
<
w_type
,
n
/
2
>
s
;
for
(
int
i
=
0
;
i
<
(
n
/
2
);
i
++
)
s
.
s
[
i
]
=
(
w_type
)
a
.
s
[
i
*
2
]
*
b
.
s
[
i
*
2
]
+
(
w_type
)
a
.
s
[
i
*
2
+
1
]
*
b
.
s
[
i
*
2
+
1
]
+
c
.
s
[
i
];
return
s
;
}
/** @brief Multiply and expand
Multiply values two registers and store results in two registers with wider pack type.
...
...
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
cfaca432
...
...
@@ -506,6 +506,12 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
return
v_int32x4
(
vaddq_s32
(
cd
.
val
[
0
],
cd
.
val
[
1
]));
}
inline
v_int32x4
v_dotprod
(
const
v_int16x8
&
a
,
const
v_int16x8
&
b
,
const
v_int32x4
&
c
)
{
v_int32x4
s
=
v_dotprod
(
a
,
b
);
return
v_int32x4
(
vaddq_s32
(
s
.
val
,
c
.
val
));
}
#define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
...
...
@@ -730,6 +736,11 @@ inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_
return
v_float32x4
(
vmlaq_f32
(
c
.
val
,
a
.
val
,
b
.
val
));
}
inline
v_int32x4
v_muladd
(
const
v_int32x4
&
a
,
const
v_int32x4
&
b
,
const
v_int32x4
&
c
)
{
return
v_int32x4
(
vmlaq_s32
(
c
.
val
,
a
.
val
,
b
.
val
));
}
#if CV_SIMD128_64F
inline
v_float64x2
v_magnitude
(
const
v_float64x2
&
a
,
const
v_float64x2
&
b
)
{
...
...
@@ -1095,6 +1106,18 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
OPENCV_HAL_IMPL_NEON_EXTRACT
(
float64x2
,
f64
)
#endif
#if CV_SIMD128_64F
inline
v_int32x4
v_round
(
const
v_float32x4
&
a
)
{
float32x4_t
a_
=
a
.
val
;
int32x4_t
result
;
__asm__
(
"fcvtns %0.4s, %1.4s"
:
"=w"
(
result
)
:
"w"
(
a_
)
:
/* No clobbers */
);
return
v_int32x4
(
result
);
}
#else
inline
v_int32x4
v_round
(
const
v_float32x4
&
a
)
{
static
const
int32x4_t
v_sign
=
vdupq_n_s32
(
1
<<
31
),
...
...
@@ -1103,7 +1126,7 @@ inline v_int32x4 v_round(const v_float32x4& a)
int32x4_t
v_addition
=
vorrq_s32
(
v_05
,
vandq_s32
(
v_sign
,
vreinterpretq_s32_f32
(
a
.
val
)));
return
v_int32x4
(
vcvtq_s32_f32
(
vaddq_f32
(
a
.
val
,
vreinterpretq_f32_s32
(
v_addition
))));
}
#endif
inline
v_int32x4
v_floor
(
const
v_float32x4
&
a
)
{
int32x4_t
a1
=
vcvtq_s32_f32
(
a
.
val
);
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
cfaca432
...
...
@@ -710,6 +710,11 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
return
v_int32x4
(
_mm_madd_epi16
(
a
.
val
,
b
.
val
));
}
inline
v_int32x4
v_dotprod
(
const
v_int16x8
&
a
,
const
v_int16x8
&
b
,
const
v_int32x4
&
c
)
{
return
v_int32x4
(
_mm_add_epi32
(
_mm_madd_epi16
(
a
.
val
,
b
.
val
),
c
.
val
));
}
#define OPENCV_HAL_IMPL_SSE_LOGIC_OP(_Tpvec, suffix, not_const) \
OPENCV_HAL_IMPL_SSE_BIN_OP(&, _Tpvec, _mm_and_##suffix) \
OPENCV_HAL_IMPL_SSE_BIN_OP(|, _Tpvec, _mm_or_##suffix) \
...
...
@@ -954,6 +959,10 @@ inline v_uint32x4 v_absdiff(const v_int32x4& a, const v_int32x4& b)
__m128i
m
=
_mm_cmpgt_epi32
(
b
.
val
,
a
.
val
);
return
v_uint32x4
(
_mm_sub_epi32
(
_mm_xor_si128
(
d
,
m
),
m
));
}
inline
v_int32x4
v_muladd
(
const
v_int32x4
&
a
,
const
v_int32x4
&
b
,
const
v_int32x4
&
c
)
{
return
a
*
b
+
c
;
}
#define OPENCV_HAL_IMPL_SSE_MISC_FLT_OP(_Tpvec, _Tp, _Tpreg, suffix, absmask_vec) \
inline _Tpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
...
...
@@ -1632,7 +1641,7 @@ inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2&
c
=
v_reinterpret_as_f64
(
t2
);
}
// 2-channel
, float only
// 2-channel
inline
void
v_load_deinterleave
(
const
float
*
ptr
,
v_float32x4
&
a
,
v_float32x4
&
b
)
{
const
int
mask_lo
=
_MM_SHUFFLE
(
2
,
0
,
2
,
0
),
mask_hi
=
_MM_SHUFFLE
(
3
,
1
,
3
,
1
);
...
...
@@ -1644,7 +1653,29 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
b
.
val
=
_mm_shuffle_ps
(
u0
,
u1
,
mask_hi
);
// b0 b1 ab b3
}
inline
void
v_store_interleave
(
short
*
ptr
,
const
v_int16x8
&
a
,
const
v_int16x8
&
b
)
inline
void
v_load_deinterleave
(
const
short
*
ptr
,
v_int16x8
&
a
,
v_int16x8
&
b
)
{
__m128i
v0
=
_mm_loadu_si128
((
__m128i
*
)(
ptr
));
// a0 b0 a1 b1 a2 b2 a3 b3
__m128i
v1
=
_mm_loadu_si128
((
__m128i
*
)(
ptr
+
8
));
// a4 b4 a5 b5 a6 b6 a7 b7
__m128i
v2
=
_mm_unpacklo_epi16
(
v0
,
v1
);
// a0 a4 b0 b4 a1 a5 b1 b5
__m128i
v3
=
_mm_unpackhi_epi16
(
v0
,
v1
);
// a2 a6 b2 b6 a3 a7 b3 b7
__m128i
v4
=
_mm_unpacklo_epi16
(
v2
,
v3
);
// a0 a2 a4 a6 b0 b2 b4 b6
__m128i
v5
=
_mm_unpackhi_epi16
(
v2
,
v3
);
// a1 a3 a5 a7 b1 b3 b5 b7
a
.
val
=
_mm_unpacklo_epi16
(
v4
,
v5
);
// a0 a1 a2 a3 a4 a5 a6 a7
b
.
val
=
_mm_unpackhi_epi16
(
v4
,
v5
);
// b0 b1 ab b3 b4 b5 b6 b7
}
inline
void
v_load_deinterleave
(
const
ushort
*
ptr
,
v_uint16x8
&
a
,
v_uint16x8
&
b
)
{
v_int16x8
sa
,
sb
;
v_load_deinterleave
((
const
short
*
)
ptr
,
sa
,
sb
);
a
=
v_reinterpret_as_u16
(
sa
);
b
=
v_reinterpret_as_u16
(
sb
);
}
inline
void
v_store_interleave
(
short
*
ptr
,
const
v_int16x8
&
a
,
const
v_int16x8
&
b
)
{
__m128i
t0
,
t1
;
t0
=
_mm_unpacklo_epi16
(
a
.
val
,
b
.
val
);
...
...
modules/core/include/opencv2/core/hal/intrin_vsx.hpp
View file @
cfaca432
...
...
@@ -760,6 +760,9 @@ inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
OPENCV_HAL_IMPL_VSX_MULADD
(
v_float32x4
)
OPENCV_HAL_IMPL_VSX_MULADD
(
v_float64x2
)
inline
v_int32x4
v_muladd
(
const
v_int32x4
&
a
,
const
v_int32x4
&
b
,
const
v_int32x4
&
c
)
{
return
a
*
b
+
c
;
}
// TODO: exp, log, sin, cos
/** Absolute values **/
...
...
@@ -843,6 +846,9 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
inline
v_int32x4
v_dotprod
(
const
v_int16x8
&
a
,
const
v_int16x8
&
b
)
{
return
v_int32x4
(
vec_msum
(
a
.
val
,
b
.
val
,
vec_int4_z
));
}
inline
v_int32x4
v_dotprod
(
const
v_int16x8
&
a
,
const
v_int16x8
&
b
,
const
v_int32x4
&
c
)
{
return
v_int32x4
(
vec_msum
(
a
.
val
,
b
.
val
,
c
.
val
));
}
inline
v_float32x4
v_matmul
(
const
v_float32x4
&
v
,
const
v_float32x4
&
m0
,
const
v_float32x4
&
m1
,
const
v_float32x4
&
m2
,
const
v_float32x4
&
m3
)
...
...
modules/core/test/test_intrin_utils.hpp
View file @
cfaca432
...
...
@@ -521,15 +521,25 @@ template<typename R> struct TheTest
TheTest
&
test_dot_prod
()
{
typedef
typename
V_RegTrait128
<
LaneType
>::
w_reg
Rx2
;
typedef
typename
Rx2
::
lane_type
w_type
;
Data
<
R
>
dataA
,
dataB
(
2
);
R
a
=
dataA
,
b
=
dataB
;
Data
<
Rx2
>
res
=
v_dotprod
(
a
,
b
);
Data
<
Rx2
>
dataC
;
dataC
+=
std
::
numeric_limits
<
w_type
>::
is_signed
?
std
::
numeric_limits
<
w_type
>::
min
()
:
std
::
numeric_limits
<
w_type
>::
max
()
-
R
::
nlanes
*
(
dataB
[
0
]
+
1
);
Rx2
c
=
dataC
;
Data
<
Rx2
>
resD
=
v_dotprod
(
a
,
b
),
resE
=
v_dotprod
(
a
,
b
,
c
);
const
int
n
=
R
::
nlanes
/
2
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_EQ
(
dataA
[
i
*
2
]
*
dataB
[
i
*
2
]
+
dataA
[
i
*
2
+
1
]
*
dataB
[
i
*
2
+
1
],
res
[
i
]);
EXPECT_EQ
(
dataA
[
i
*
2
]
*
dataB
[
i
*
2
]
+
dataA
[
i
*
2
+
1
]
*
dataB
[
i
*
2
+
1
],
resD
[
i
]);
EXPECT_EQ
(
dataA
[
i
*
2
]
*
dataB
[
i
*
2
]
+
dataA
[
i
*
2
+
1
]
*
dataB
[
i
*
2
+
1
]
+
dataC
[
i
],
resE
[
i
]);
}
return
*
this
;
}
...
...
modules/imgproc/perf/opencl/perf_imgwarp.cpp
View file @
cfaca432
...
...
@@ -229,7 +229,7 @@ OCL_PERF_TEST_P(RemapFixture, Remap,
OCL_TEST_CYCLE
()
cv
::
remap
(
src
,
dst
,
xmap
,
ymap
,
interpolation
,
borderMode
);
SANITY_CHECK
(
dst
,
eps
);
SANITY_CHECK
_NOTHING
(
);
}
}
}
// namespace opencv_test::ocl
...
...
modules/imgproc/perf/perf_warp.cpp
View file @
cfaca432
...
...
@@ -202,8 +202,8 @@ PERF_TEST_P( TestWarpPerspectiveNear_t, WarpPerspectiveNear,
PERF_TEST_P
(
TestRemap
,
remap
,
Combine
(
Values
(
TYPICAL_MAT_TYPES
),
Values
(
szVGA
,
sz
720p
,
sz
1080p
),
Values
(
CV_8UC1
,
CV_8UC3
,
CV_8UC4
,
CV_32FC1
),
Values
(
szVGA
,
sz1080p
),
InterType
::
all
(),
BorderMode
::
all
(),
RemapMode
::
all
()
...
...
@@ -231,7 +231,7 @@ PERF_TEST_P( TestRemap, remap,
remap
(
source
,
destination
,
map_x
,
map_y
,
interpolationType
,
borderMode
);
}
SANITY_CHECK
(
destination
,
1
);
SANITY_CHECK
_NOTHING
(
);
}
void
update_map
(
const
Mat
&
src
,
Mat
&
map_x
,
Mat
&
map_y
,
const
int
remapMode
)
...
...
modules/imgproc/src/imgwarp.cpp
View file @
cfaca432
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment