Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
4a54aa3f
Commit
4a54aa3f
authored
Apr 22, 2019
by
Vitaly Tuzov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Cleared up deprecated intrinsics for FP16
parent
5c0a98cf
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
72 deletions
+13
-72
intrin_avx.hpp
modules/core/include/opencv2/core/hal/intrin_avx.hpp
+4
-17
intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+0
-42
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+9
-13
No files found.
modules/core/include/opencv2/core/hal/intrin_avx.hpp
View file @
4a54aa3f
...
...
@@ -431,19 +431,6 @@ inline v_float64x4 v_reinterpret_as_f64(const v_float64x4& a)
inline
v_float64x4
v_reinterpret_as_f64
(
const
v_float32x8
&
a
)
{
return
v_float64x4
(
_mm256_castps_pd
(
a
.
val
));
}
#if CV_FP16
inline
v_float32x8
v256_load_fp16_f32
(
const
short
*
ptr
)
{
return
v_float32x8
(
_mm256_cvtph_ps
(
_mm_loadu_si128
((
const
__m128i
*
)
ptr
)));
}
inline
void
v_store_fp16
(
short
*
ptr
,
const
v_float32x8
&
a
)
{
__m128i
fp16_value
=
_mm256_cvtps_ph
(
a
.
val
,
0
);
_mm_store_si128
((
__m128i
*
)
ptr
,
fp16_value
);
}
#endif
/* Recombine */
/*#define OPENCV_HAL_IMPL_AVX_COMBINE(_Tpvec, perm) \
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b) \
...
...
@@ -1400,7 +1387,7 @@ inline v_float32x8 v_cvt_f32(const v_float64x4& a)
inline
v_float32x8
v_cvt_f32
(
const
v_float64x4
&
a
,
const
v_float64x4
&
b
)
{
__m128
af
=
_mm256_cvtpd_ps
(
a
.
val
),
bf
=
_mm256_cvtpd_ps
(
b
.
val
);
return
v_float32x8
(
_
mm256_insertf128_ps
(
_mm256_castps128_ps256
(
af
),
bf
,
1
));
return
v_float32x8
(
_
v256_combine
(
af
,
bf
));
}
inline
v_float64x4
v_cvt_f64
(
const
v_int32x8
&
a
)
...
...
@@ -1474,7 +1461,7 @@ inline v_int32x8 v256_lut_pairs(const int* tab, const int* idx)
}
inline
v_int32x8
v256_lut_quads
(
const
int
*
tab
,
const
int
*
idx
)
{
return
v_int32x8
(
_
mm256_insertf128_si256
(
_mm256_castsi128_si256
(
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
]))),
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
1
])),
0x1
));
return
v_int32x8
(
_
v256_combine
(
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
])),
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
1
]))
));
}
inline
v_uint32x8
v256_lut
(
const
unsigned
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u32
(
v256_lut
((
const
int
*
)
tab
,
idx
));
}
inline
v_uint32x8
v256_lut_pairs
(
const
unsigned
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u32
(
v256_lut_pairs
((
const
int
*
)
tab
,
idx
));
}
...
...
@@ -1490,7 +1477,7 @@ inline v_int64x4 v256_lut(const int64* tab, const int* idx)
}
inline
v_int64x4
v256_lut_pairs
(
const
int64
*
tab
,
const
int
*
idx
)
{
return
v_int64x4
(
_
mm256_insertf128_si256
(
_mm256_castsi128_si256
(
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
]))),
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
1
])),
0x1
));
return
v_int64x4
(
_
v256_combine
(
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
0
])),
_mm_loadu_si128
((
const
__m128i
*
)(
tab
+
idx
[
1
]))
));
}
inline
v_uint64x4
v256_lut
(
const
uint64
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u64
(
v256_lut
((
const
int64
*
)
tab
,
idx
));
}
inline
v_uint64x4
v256_lut_pairs
(
const
uint64
*
tab
,
const
int
*
idx
)
{
return
v_reinterpret_as_u64
(
v256_lut_pairs
((
const
int64
*
)
tab
,
idx
));
}
...
...
@@ -1506,7 +1493,7 @@ inline v_float64x4 v256_lut(const double* tab, const int* idx)
{
return
v_float64x4
(
_mm256_i32gather_pd
(
tab
,
_mm_loadu_si128
((
const
__m128i
*
)
idx
),
8
));
}
inline
v_float64x4
v256_lut_pairs
(
const
double
*
tab
,
const
int
*
idx
)
{
return
v_float64x4
(
_
mm256_insertf128_pd
(
_mm256_castpd128_pd256
(
_mm_loadu_pd
(
tab
+
idx
[
0
])),
_mm_loadu_pd
(
tab
+
idx
[
1
]),
0x1
));
}
inline
v_float64x4
v256_lut_pairs
(
const
double
*
tab
,
const
int
*
idx
)
{
return
v_float64x4
(
_
v256_combine
(
_mm_loadu_pd
(
tab
+
idx
[
0
]),
_mm_loadu_pd
(
tab
+
idx
[
1
])
));
}
inline
v_int32x8
v_lut
(
const
int
*
tab
,
const
v_int32x8
&
idxvec
)
{
...
...
modules/core/include/opencv2/core/hal/intrin_neon.hpp
View file @
4a54aa3f
...
...
@@ -278,48 +278,6 @@ struct v_float64x2
};
#endif
#if CV_FP16
// Workaround for old compilers
static
inline
int16x4_t
vreinterpret_s16_f16
(
float16x4_t
a
)
{
return
(
int16x4_t
)
a
;
}
static
inline
float16x4_t
vreinterpret_f16_s16
(
int16x4_t
a
)
{
return
(
float16x4_t
)
a
;
}
static
inline
float16x4_t
cv_vld1_f16
(
const
void
*
ptr
)
{
#ifndef vld1_f16 // APPLE compiler defines vld1_f16 as macro
return
vreinterpret_f16_s16
(
vld1_s16
((
const
short
*
)
ptr
));
#else
return
vld1_f16
((
const
__fp16
*
)
ptr
);
#endif
}
static
inline
void
cv_vst1_f16
(
void
*
ptr
,
float16x4_t
a
)
{
#ifndef vst1_f16 // APPLE compiler defines vst1_f16 as macro
vst1_s16
((
short
*
)
ptr
,
vreinterpret_s16_f16
(
a
));
#else
vst1_f16
((
__fp16
*
)
ptr
,
a
);
#endif
}
#ifndef vdup_n_f16
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
#endif
#endif // CV_FP16
#if CV_FP16
inline
v_float32x4
v128_load_fp16_f32
(
const
short
*
ptr
)
{
float16x4_t
a
=
cv_vld1_f16
((
const
__fp16
*
)
ptr
);
return
v_float32x4
(
vcvt_f32_f16
(
a
));
}
inline
void
v_store_fp16
(
short
*
ptr
,
const
v_float32x4
&
a
)
{
float16x4_t
fp16
=
vcvt_f16_f32
(
a
.
val
);
cv_vst1_f16
((
short
*
)
ptr
,
fp16
);
}
#endif
#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
4a54aa3f
...
...
@@ -2684,19 +2684,6 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
return
v_float64x2
(
_mm_cvtps_pd
(
_mm_movehl_ps
(
a
.
val
,
a
.
val
)));
}
#if CV_FP16
inline
v_float32x4
v128_load_fp16_f32
(
const
short
*
ptr
)
{
return
v_float32x4
(
_mm_cvtph_ps
(
_mm_loadu_si128
((
const
__m128i
*
)
ptr
)));
}
inline
void
v_store_fp16
(
short
*
ptr
,
const
v_float32x4
&
a
)
{
__m128i
fp16_value
=
_mm_cvtps_ph
(
a
.
val
,
0
);
_mm_storel_epi64
((
__m128i
*
)
ptr
,
fp16_value
);
}
#endif
////////////// Lookup table access ////////////////////
inline
v_int8x16
v_lut
(
const
schar
*
tab
,
const
int
*
idx
)
...
...
@@ -2956,6 +2943,9 @@ inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; }
inline
v_float32x4
v_load_expand
(
const
float16_t
*
ptr
)
{
#if CV_FP16
return
v_float32x4
(
_mm_cvtph_ps
(
_mm_loadu_si128
((
const
__m128i
*
)
ptr
)));
#else
const
__m128i
z
=
_mm_setzero_si128
(),
delta
=
_mm_set1_epi32
(
0x38000000
);
const
__m128i
signmask
=
_mm_set1_epi32
(
0x80000000
),
maxexp
=
_mm_set1_epi32
(
0x7c000000
);
const
__m128
deltaf
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x38800000
));
...
...
@@ -2968,10 +2958,15 @@ inline v_float32x4 v_load_expand(const float16_t* ptr)
__m128i
zmask
=
_mm_cmpeq_epi32
(
e
,
z
);
__m128i
ft
=
v_select_si128
(
zmask
,
zt
,
t
);
return
v_float32x4
(
_mm_castsi128_ps
(
_mm_or_si128
(
ft
,
sign
)));
#endif
}
inline
void
v_pack_store
(
float16_t
*
ptr
,
const
v_float32x4
&
v
)
{
#if CV_FP16
__m128i
fp16_value
=
_mm_cvtps_ph
(
v
.
val
,
0
);
_mm_storel_epi64
((
__m128i
*
)
ptr
,
fp16_value
);
#else
const
__m128i
signmask
=
_mm_set1_epi32
(
0x80000000
);
const
__m128i
rval
=
_mm_set1_epi32
(
0x3f000000
);
...
...
@@ -2993,6 +2988,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
t
=
_mm_or_si128
(
t
,
sign
);
t
=
_mm_packs_epi32
(
t
,
t
);
_mm_storel_epi64
((
__m128i
*
)
ptr
,
t
);
#endif
}
inline
void
v_cleanup
()
{}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment