Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
fbdcc0e8
Commit
fbdcc0e8
authored
Apr 07, 2018
by
k-shinotsuka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add universal intrinsics for HSV2RGB_f
parent
099a16bd
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
152 additions
and
127 deletions
+152
-127
intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+69
-1
color_hsv.cpp
modules/imgproc/src/color_hsv.cpp
+83
-126
No files found.
modules/core/include/opencv2/core/hal/intrin_sse.hpp
View file @
fbdcc0e8
...
...
@@ -1570,6 +1570,39 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
v_transpose4x4
(
u0
,
u1
,
u2
,
u3
,
a
,
b
,
c
,
d
);
}
inline
void
v_load_deinterleave
(
const
float
*
ptr
,
v_float32x4
&
a
,
v_float32x4
&
b
,
v_float32x4
&
c
)
{
__m128
t0
=
_mm_loadu_ps
(
ptr
+
0
);
__m128
t1
=
_mm_loadu_ps
(
ptr
+
4
);
__m128
t2
=
_mm_loadu_ps
(
ptr
+
8
);
__m128
at12
=
_mm_shuffle_ps
(
t1
,
t2
,
_MM_SHUFFLE
(
0
,
1
,
0
,
2
));
a
.
val
=
_mm_shuffle_ps
(
t0
,
at12
,
_MM_SHUFFLE
(
2
,
0
,
3
,
0
));
__m128
bt01
=
_mm_shuffle_ps
(
t0
,
t1
,
_MM_SHUFFLE
(
0
,
0
,
0
,
1
));
__m128
bt12
=
_mm_shuffle_ps
(
t1
,
t2
,
_MM_SHUFFLE
(
0
,
2
,
0
,
3
));
b
.
val
=
_mm_shuffle_ps
(
bt01
,
bt12
,
_MM_SHUFFLE
(
2
,
0
,
2
,
0
));
__m128
ct01
=
_mm_shuffle_ps
(
t0
,
t1
,
_MM_SHUFFLE
(
0
,
1
,
0
,
2
));
c
.
val
=
_mm_shuffle_ps
(
ct01
,
t2
,
_MM_SHUFFLE
(
3
,
0
,
2
,
0
));
}
inline
void
v_load_deinterleave
(
const
float
*
ptr
,
v_float32x4
&
a
,
v_float32x4
&
b
,
v_float32x4
&
c
,
v_float32x4
&
d
)
{
__m128
t0
=
_mm_loadu_ps
(
ptr
+
0
);
__m128
t1
=
_mm_loadu_ps
(
ptr
+
4
);
__m128
t2
=
_mm_loadu_ps
(
ptr
+
8
);
__m128
t3
=
_mm_loadu_ps
(
ptr
+
12
);
__m128
t02lo
=
_mm_unpacklo_ps
(
t0
,
t2
);
__m128
t13lo
=
_mm_unpacklo_ps
(
t1
,
t3
);
__m128
t02hi
=
_mm_unpackhi_ps
(
t0
,
t2
);
__m128
t13hi
=
_mm_unpackhi_ps
(
t1
,
t3
);
a
.
val
=
_mm_unpacklo_ps
(
t02lo
,
t13lo
);
b
.
val
=
_mm_unpackhi_ps
(
t02lo
,
t13lo
);
c
.
val
=
_mm_unpacklo_ps
(
t02hi
,
t13hi
);
d
.
val
=
_mm_unpackhi_ps
(
t02hi
,
t13hi
);
}
inline
void
v_load_deinterleave
(
const
uint64
*
ptr
,
v_uint64x2
&
a
,
v_uint64x2
&
b
,
v_uint64x2
&
c
)
{
__m128i
t0
=
_mm_loadu_si128
((
const
__m128i
*
)
ptr
);
...
...
@@ -1796,6 +1829,41 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
_mm_storeu_ps
((
ptr
+
4
),
u1
);
}
inline
void
v_store_interleave
(
float
*
ptr
,
const
v_float32x4
&
a
,
const
v_float32x4
&
b
,
const
v_float32x4
&
c
)
{
__m128
u0
=
_mm_shuffle_ps
(
a
.
val
,
b
.
val
,
_MM_SHUFFLE
(
0
,
0
,
0
,
0
));
__m128
u1
=
_mm_shuffle_ps
(
c
.
val
,
a
.
val
,
_MM_SHUFFLE
(
1
,
1
,
0
,
0
));
__m128
v0
=
_mm_shuffle_ps
(
u0
,
u1
,
_MM_SHUFFLE
(
2
,
0
,
2
,
0
));
__m128
u2
=
_mm_shuffle_ps
(
b
.
val
,
c
.
val
,
_MM_SHUFFLE
(
1
,
1
,
1
,
1
));
__m128
u3
=
_mm_shuffle_ps
(
a
.
val
,
b
.
val
,
_MM_SHUFFLE
(
2
,
2
,
2
,
2
));
__m128
v1
=
_mm_shuffle_ps
(
u2
,
u3
,
_MM_SHUFFLE
(
2
,
0
,
2
,
0
));
__m128
u4
=
_mm_shuffle_ps
(
c
.
val
,
a
.
val
,
_MM_SHUFFLE
(
3
,
3
,
2
,
2
));
__m128
u5
=
_mm_shuffle_ps
(
b
.
val
,
c
.
val
,
_MM_SHUFFLE
(
3
,
3
,
3
,
3
));
__m128
v2
=
_mm_shuffle_ps
(
u4
,
u5
,
_MM_SHUFFLE
(
2
,
0
,
2
,
0
));
_mm_storeu_ps
(
ptr
+
0
,
v0
);
_mm_storeu_ps
(
ptr
+
4
,
v1
);
_mm_storeu_ps
(
ptr
+
8
,
v2
);
}
inline
void
v_store_interleave
(
float
*
ptr
,
const
v_float32x4
&
a
,
const
v_float32x4
&
b
,
const
v_float32x4
&
c
,
const
v_float32x4
&
d
)
{
__m128
u0
=
_mm_unpacklo_ps
(
a
.
val
,
c
.
val
);
__m128
u1
=
_mm_unpacklo_ps
(
b
.
val
,
d
.
val
);
__m128
u2
=
_mm_unpackhi_ps
(
a
.
val
,
c
.
val
);
__m128
u3
=
_mm_unpackhi_ps
(
b
.
val
,
d
.
val
);
__m128
v0
=
_mm_unpacklo_ps
(
u0
,
u1
);
__m128
v2
=
_mm_unpacklo_ps
(
u2
,
u3
);
__m128
v1
=
_mm_unpackhi_ps
(
u0
,
u1
);
__m128
v3
=
_mm_unpackhi_ps
(
u2
,
u3
);
_mm_storeu_ps
(
ptr
+
0
,
v0
);
_mm_storeu_ps
(
ptr
+
4
,
v1
);
_mm_storeu_ps
(
ptr
+
8
,
v2
);
_mm_storeu_ps
(
ptr
+
12
,
v3
);
}
inline
void
v_store_interleave
(
uint64
*
ptr
,
const
v_uint64x2
&
a
,
const
v_uint64x2
&
b
,
const
v_uint64x2
&
c
)
{
__m128i
t0
=
_mm_unpacklo_epi64
(
a
.
val
,
b
.
val
);
...
...
@@ -1858,7 +1926,7 @@ inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, const _Tpvec& b0, \
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE
(
v_int8x16
,
schar
,
s8
,
v_uint8x16
,
uchar
,
u8
)
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE
(
v_int16x8
,
short
,
s16
,
v_uint16x8
,
ushort
,
u16
)
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE
(
v_int32x4
,
int
,
s32
,
v_uint32x4
,
unsigned
,
u32
)
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE
(
v_float32x4
,
float
,
f32
,
v_uint32x4
,
unsigned
,
u32
)
//
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float32x4, float, f32, v_uint32x4, unsigned, u32)
inline
v_float32x4
v_cvt_f32
(
const
v_int32x4
&
a
)
{
...
...
modules/imgproc/src/color_hsv.cpp
View file @
fbdcc0e8
...
...
@@ -134,159 +134,116 @@ struct HSV2RGB_f
HSV2RGB_f
(
int
_dstcn
,
int
_blueIdx
,
float
_hrange
)
:
dstcn
(
_dstcn
),
blueIdx
(
_blueIdx
),
hscale
(
6.
f
/
_hrange
)
{
#if CV_S
SE2
ha
veSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#if CV_S
IMD128
ha
sSIMD
=
hasSIMD128
(
);
#endif
}
#if CV_S
SE2
void
process
(
__m128
&
v_h0
,
__m128
&
v_h1
,
__m128
&
v_s0
,
__m128
&
v_s1
,
__m128
&
v_v0
,
__m128
&
v_v1
)
const
#if CV_S
IMD128
inline
void
process
(
v_float32x4
&
v_h
,
v_float32x4
&
v_s
,
v_float32x4
&
v_v
,
v_float32x4
&
v_scale
)
const
{
v_h0
=
_mm_mul_ps
(
v_h0
,
_mm_set1_ps
(
hscale
));
v_h1
=
_mm_mul_ps
(
v_h1
,
_mm_set1_ps
(
hscale
));
__m128
v_pre_sector0
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_h0
));
__m128
v_pre_sector1
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_h1
));
v_h0
=
_mm_sub_ps
(
v_h0
,
v_pre_sector0
);
v_h1
=
_mm_sub_ps
(
v_h1
,
v_pre_sector1
);
__m128
v_tab00
=
v_v0
;
__m128
v_tab01
=
v_v1
;
__m128
v_tab10
=
_mm_mul_ps
(
v_v0
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
v_s0
));
__m128
v_tab11
=
_mm_mul_ps
(
v_v1
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
v_s1
));
__m128
v_tab20
=
_mm_mul_ps
(
v_v0
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
_mm_mul_ps
(
v_s0
,
v_h0
)));
__m128
v_tab21
=
_mm_mul_ps
(
v_v1
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
_mm_mul_ps
(
v_s1
,
v_h1
)));
__m128
v_tab30
=
_mm_mul_ps
(
v_v0
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
_mm_mul_ps
(
v_s0
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
v_h0
))));
__m128
v_tab31
=
_mm_mul_ps
(
v_v1
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
_mm_mul_ps
(
v_s1
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
v_h1
))));
__m128
v_sector0
=
_mm_div_ps
(
v_pre_sector0
,
_mm_set1_ps
(
6.0
f
));
__m128
v_sector1
=
_mm_div_ps
(
v_pre_sector1
,
_mm_set1_ps
(
6.0
f
));
v_sector0
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_sector0
));
v_sector1
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_sector1
));
v_sector0
=
_mm_mul_ps
(
v_sector0
,
_mm_set1_ps
(
6.0
f
));
v_sector1
=
_mm_mul_ps
(
v_sector1
,
_mm_set1_ps
(
6.0
f
));
v_sector0
=
_mm_sub_ps
(
v_pre_sector0
,
v_sector0
);
v_sector1
=
_mm_sub_ps
(
v_pre_sector1
,
v_sector1
);
v_h0
=
_mm_and_ps
(
v_tab10
,
_mm_cmplt_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
)));
v_h1
=
_mm_and_ps
(
v_tab11
,
_mm_cmplt_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
)));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_tab30
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_tab31
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
))));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_tab00
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_tab01
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_tab00
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_tab01
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_tab20
,
_mm_cmpgt_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_tab21
,
_mm_cmpgt_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
v_s0
=
_mm_and_ps
(
v_tab30
,
_mm_cmplt_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
)));
v_s1
=
_mm_and_ps
(
v_tab31
,
_mm_cmplt_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
)));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_tab00
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_tab01
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
))));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_tab00
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_tab01
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
))));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_tab20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_tab21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_tab10
,
_mm_cmpgt_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_tab11
,
_mm_cmpgt_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_v0
=
_mm_and_ps
(
v_tab00
,
_mm_cmplt_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
)));
v_v1
=
_mm_and_ps
(
v_tab01
,
_mm_cmplt_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
)));
v_v0
=
_mm_or_ps
(
v_v0
,
_mm_and_ps
(
v_tab20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
))));
v_v1
=
_mm_or_ps
(
v_v1
,
_mm_and_ps
(
v_tab21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
))));
v_v0
=
_mm_or_ps
(
v_v0
,
_mm_and_ps
(
v_tab10
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
))));
v_v1
=
_mm_or_ps
(
v_v1
,
_mm_and_ps
(
v_tab11
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
))));
v_v0
=
_mm_or_ps
(
v_v0
,
_mm_and_ps
(
v_tab10
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_v1
=
_mm_or_ps
(
v_v1
,
_mm_and_ps
(
v_tab11
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_v0
=
_mm_or_ps
(
v_v0
,
_mm_and_ps
(
v_tab30
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_v1
=
_mm_or_ps
(
v_v1
,
_mm_and_ps
(
v_tab31
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
v_v0
=
_mm_or_ps
(
v_v0
,
_mm_and_ps
(
v_tab00
,
_mm_cmpgt_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_v1
=
_mm_or_ps
(
v_v1
,
_mm_and_ps
(
v_tab01
,
_mm_cmpgt_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
v_h
=
v_h
*
v_scale
;
v_float32x4
v_pre_sector
=
v_cvt_f32
(
v_trunc
(
v_h
));
v_h
=
v_h
-
v_pre_sector
;
v_float32x4
v_tab0
=
v_v
;
v_float32x4
v_one
=
v_setall_f32
(
1.0
f
);
v_float32x4
v_tab1
=
v_v
*
(
v_one
-
v_s
);
v_float32x4
v_tab2
=
v_v
*
(
v_one
-
(
v_s
*
v_h
));
v_float32x4
v_tab3
=
v_v
*
(
v_one
-
(
v_s
*
(
v_one
-
v_h
)));
v_float32x4
v_one_sixth
=
v_setall_f32
(
1.0
f
/
6.0
f
);
v_float32x4
v_sector
=
v_pre_sector
*
v_one_sixth
;
v_sector
=
v_cvt_f32
(
v_trunc
(
v_sector
));
v_float32x4
v_six
=
v_setall_f32
(
6.0
f
);
v_sector
=
v_pre_sector
-
(
v_sector
*
v_six
);
v_float32x4
v_two
=
v_setall_f32
(
2.0
f
);
v_h
=
v_tab1
&
(
v_sector
<
v_two
);
v_h
=
v_h
|
(
v_tab3
&
(
v_sector
==
v_two
));
v_float32x4
v_three
=
v_setall_f32
(
3.0
f
);
v_h
=
v_h
|
(
v_tab0
&
(
v_sector
==
v_three
));
v_float32x4
v_four
=
v_setall_f32
(
4.0
f
);
v_h
=
v_h
|
(
v_tab0
&
(
v_sector
==
v_four
));
v_h
=
v_h
|
(
v_tab2
&
(
v_sector
>
v_four
));
v_s
=
v_tab3
&
(
v_sector
<
v_one
);
v_s
=
v_s
|
(
v_tab0
&
(
v_sector
==
v_one
));
v_s
=
v_s
|
(
v_tab0
&
(
v_sector
==
v_two
));
v_s
=
v_s
|
(
v_tab2
&
(
v_sector
==
v_three
));
v_s
=
v_s
|
(
v_tab1
&
(
v_sector
>
v_three
));
v_v
=
v_tab0
&
(
v_sector
<
v_one
);
v_v
=
v_v
|
(
v_tab2
&
(
v_sector
==
v_one
));
v_v
=
v_v
|
(
v_tab1
&
(
v_sector
==
v_two
));
v_v
=
v_v
|
(
v_tab1
&
(
v_sector
==
v_three
));
v_v
=
v_v
|
(
v_tab3
&
(
v_sector
==
v_four
));
v_v
=
v_v
|
(
v_tab0
&
(
v_sector
>
v_four
));
}
#endif
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
i
=
0
,
bidx
=
blueIdx
,
dcn
=
dstcn
;
float
_hscale
=
hscale
;
float
alpha
=
ColorChannel
<
float
>::
max
();
n
*=
3
;
#if CV_S
SE2
if
(
ha
ve
SIMD
)
#if CV_S
IMD128
if
(
ha
s
SIMD
)
{
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
v_float32x4
v_scale
=
v_setall_f32
(
hscale
);
if
(
dcn
==
3
)
{
__m128
v_h0
=
_mm_loadu_ps
(
src
+
i
+
0
);
__m128
v_h1
=
_mm_loadu_ps
(
src
+
i
+
4
);
__m128
v_s0
=
_mm_loadu_ps
(
src
+
i
+
8
);
__m128
v_s1
=
_mm_loadu_ps
(
src
+
i
+
12
);
__m128
v_v0
=
_mm_loadu_ps
(
src
+
i
+
16
);
__m128
v_v1
=
_mm_loadu_ps
(
src
+
i
+
20
);
_mm_deinterleave_ps
(
v_h0
,
v_h1
,
v_s0
,
v_s1
,
v_v0
,
v_v1
);
process
(
v_h0
,
v_h1
,
v_s0
,
v_s1
,
v_v0
,
v_v1
);
if
(
dcn
==
3
)
if
(
bidx
)
{
if
(
bidx
)
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
_mm_interleave_ps
(
v_v0
,
v_v1
,
v_s0
,
v_s1
,
v_h0
,
v_h1
);
_mm_storeu_ps
(
dst
+
0
,
v_v0
);
_mm_storeu_ps
(
dst
+
4
,
v_v1
);
_mm_storeu_ps
(
dst
+
8
,
v_s0
);
_mm_storeu_ps
(
dst
+
12
,
v_s1
);
_mm_storeu_ps
(
dst
+
16
,
v_h0
);
_mm_storeu_ps
(
dst
+
20
,
v_h1
);
v_float32x4
v_h
;
v_float32x4
v_s
;
v_float32x4
v_v
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_s
,
v_v
);
process
(
v_h
,
v_s
,
v_v
,
v_scale
);
v_store_interleave
(
dst
,
v_v
,
v_s
,
v_h
);
}
else
}
else
{
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
_mm_interleave_ps
(
v_h0
,
v_h1
,
v_s0
,
v_s1
,
v_v0
,
v_v1
);
_mm_storeu_ps
(
dst
+
0
,
v_h0
);
_mm_storeu_ps
(
dst
+
4
,
v_h1
);
_mm_storeu_ps
(
dst
+
8
,
v_s0
);
_mm_storeu_ps
(
dst
+
12
,
v_s1
);
_mm_storeu_ps
(
dst
+
16
,
v_v0
);
_mm_storeu_ps
(
dst
+
20
,
v_v1
);
v_float32x4
v_h
;
v_float32x4
v_s
;
v_float32x4
v_v
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_s
,
v_v
);
process
(
v_h
,
v_s
,
v_v
,
v_scale
);
v_store_interleave
(
dst
,
v_h
,
v_s
,
v_v
);
}
}
else
}
else
{
// dcn == 4
v_float32x4
v_a
=
v_setall_f32
(
alpha
);
if
(
bidx
)
{
__m128
v_a0
=
_mm_set1_ps
(
alpha
);
__m128
v_a1
=
_mm_set1_ps
(
alpha
);
if
(
bidx
)
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
_mm_interleave_ps
(
v_v0
,
v_v1
,
v_s0
,
v_s1
,
v_h0
,
v_h1
,
v_a0
,
v_a1
);
_mm_storeu_ps
(
dst
+
0
,
v_v0
);
_mm_storeu_ps
(
dst
+
4
,
v_v1
);
_mm_storeu_ps
(
dst
+
8
,
v_s0
);
_mm_storeu_ps
(
dst
+
12
,
v_s1
);
_mm_storeu_ps
(
dst
+
16
,
v_h0
);
_mm_storeu_ps
(
dst
+
20
,
v_h1
);
_mm_storeu_ps
(
dst
+
24
,
v_a0
);
_mm_storeu_ps
(
dst
+
28
,
v_a1
);
v_float32x4
v_h
;
v_float32x4
v_s
;
v_float32x4
v_v
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_s
,
v_v
);
process
(
v_h
,
v_s
,
v_v
,
v_scale
);
v_store_interleave
(
dst
,
v_v
,
v_s
,
v_h
,
v_a
);
}
else
}
else
{
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
_mm_interleave_ps
(
v_h0
,
v_h1
,
v_s0
,
v_s1
,
v_v0
,
v_v1
,
v_a0
,
v_a1
);
_mm_storeu_ps
(
dst
+
0
,
v_h0
);
_mm_storeu_ps
(
dst
+
4
,
v_h1
);
_mm_storeu_ps
(
dst
+
8
,
v_s0
);
_mm_storeu_ps
(
dst
+
12
,
v_s1
);
_mm_storeu_ps
(
dst
+
16
,
v_v0
);
_mm_storeu_ps
(
dst
+
20
,
v_v1
);
_mm_storeu_ps
(
dst
+
24
,
v_a0
);
_mm_storeu_ps
(
dst
+
28
,
v_a1
);
v_float32x4
v_h
;
v_float32x4
v_s
;
v_float32x4
v_v
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_s
,
v_v
);
process
(
v_h
,
v_s
,
v_v
,
v_scale
);
v_store_interleave
(
dst
,
v_h
,
v_s
,
v_v
,
v_a
);
}
}
}
}
#endif
for
(
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
float
h
=
src
[
i
],
s
=
src
[
i
+
1
],
v
=
src
[
i
+
2
];
...
...
@@ -300,7 +257,7 @@ struct HSV2RGB_f
{{
1
,
3
,
0
},
{
1
,
0
,
2
},
{
3
,
0
,
1
},
{
0
,
2
,
1
},
{
0
,
1
,
3
},
{
2
,
1
,
0
}};
float
tab
[
4
];
int
sector
;
h
*=
_
hscale
;
h
*=
hscale
;
if
(
h
<
0
)
do
h
+=
6
;
while
(
h
<
0
);
else
if
(
h
>=
6
)
...
...
@@ -333,8 +290,8 @@ struct HSV2RGB_f
int
dstcn
,
blueIdx
;
float
hscale
;
#if CV_S
SE2
bool
ha
ve
SIMD
;
#if CV_S
IMD128
bool
ha
s
SIMD
;
#endif
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment