Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
566cb5e3
Commit
566cb5e3
authored
May 10, 2018
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #11430 from K-Shinotsuka:issue43
parents
6f24b810
a5f68e98
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
78 additions
and
134 deletions
+78
-134
color_hsv.cpp
modules/imgproc/src/color_hsv.cpp
+78
-134
No files found.
modules/imgproc/src/color_hsv.cpp
View file @
566cb5e3
...
...
@@ -855,167 +855,111 @@ struct HLS2RGB_f
HLS2RGB_f
(
int
_dstcn
,
int
_blueIdx
,
float
_hrange
)
:
dstcn
(
_dstcn
),
blueIdx
(
_blueIdx
),
hscale
(
6.
f
/
_hrange
)
{
#if CV_S
SE2
ha
veSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#if CV_S
IMD128
ha
sSIMD
=
hasSIMD128
(
);
#endif
}
#if CV_SSE2
void
process
(
__m128
&
v_h0
,
__m128
&
v_h1
,
__m128
&
v_l0
,
__m128
&
v_l1
,
__m128
&
v_s0
,
__m128
&
v_s1
)
const
{
__m128
v_lel0
=
_mm_cmple_ps
(
v_l0
,
_mm_set1_ps
(
0.5
f
));
__m128
v_lel1
=
_mm_cmple_ps
(
v_l1
,
_mm_set1_ps
(
0.5
f
));
__m128
v_p20
=
_mm_andnot_ps
(
v_lel0
,
_mm_sub_ps
(
_mm_add_ps
(
v_l0
,
v_s0
),
_mm_mul_ps
(
v_l0
,
v_s0
)));
__m128
v_p21
=
_mm_andnot_ps
(
v_lel1
,
_mm_sub_ps
(
_mm_add_ps
(
v_l1
,
v_s1
),
_mm_mul_ps
(
v_l1
,
v_s1
)));
v_p20
=
_mm_or_ps
(
v_p20
,
_mm_and_ps
(
v_lel0
,
_mm_mul_ps
(
v_l0
,
_mm_add_ps
(
_mm_set1_ps
(
1.0
f
),
v_s0
))));
v_p21
=
_mm_or_ps
(
v_p21
,
_mm_and_ps
(
v_lel1
,
_mm_mul_ps
(
v_l1
,
_mm_add_ps
(
_mm_set1_ps
(
1.0
f
),
v_s1
))));
__m128
v_p10
=
_mm_sub_ps
(
_mm_mul_ps
(
_mm_set1_ps
(
2.0
f
),
v_l0
),
v_p20
);
__m128
v_p11
=
_mm_sub_ps
(
_mm_mul_ps
(
_mm_set1_ps
(
2.0
f
),
v_l1
),
v_p21
);
v_h0
=
_mm_mul_ps
(
v_h0
,
_mm_set1_ps
(
hscale
));
v_h1
=
_mm_mul_ps
(
v_h1
,
_mm_set1_ps
(
hscale
));
__m128
v_pre_sector0
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_h0
));
__m128
v_pre_sector1
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_h1
));
v_h0
=
_mm_sub_ps
(
v_h0
,
v_pre_sector0
);
v_h1
=
_mm_sub_ps
(
v_h1
,
v_pre_sector1
);
__m128
v_p2_p10
=
_mm_sub_ps
(
v_p20
,
v_p10
);
__m128
v_p2_p11
=
_mm_sub_ps
(
v_p21
,
v_p11
);
__m128
v_tab20
=
_mm_add_ps
(
v_p10
,
_mm_mul_ps
(
v_p2_p10
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
v_h0
)));
__m128
v_tab21
=
_mm_add_ps
(
v_p11
,
_mm_mul_ps
(
v_p2_p11
,
_mm_sub_ps
(
_mm_set1_ps
(
1.0
f
),
v_h1
)));
__m128
v_tab30
=
_mm_add_ps
(
v_p10
,
_mm_mul_ps
(
v_p2_p10
,
v_h0
));
__m128
v_tab31
=
_mm_add_ps
(
v_p11
,
_mm_mul_ps
(
v_p2_p11
,
v_h1
));
__m128
v_sector0
=
_mm_div_ps
(
v_pre_sector0
,
_mm_set1_ps
(
6.0
f
));
__m128
v_sector1
=
_mm_div_ps
(
v_pre_sector1
,
_mm_set1_ps
(
6.0
f
));
v_sector0
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_sector0
));
v_sector1
=
_mm_cvtepi32_ps
(
_mm_cvttps_epi32
(
v_sector1
));
v_sector0
=
_mm_mul_ps
(
v_sector0
,
_mm_set1_ps
(
6.0
f
));
v_sector1
=
_mm_mul_ps
(
v_sector1
,
_mm_set1_ps
(
6.0
f
));
v_sector0
=
_mm_sub_ps
(
v_pre_sector0
,
v_sector0
);
v_sector1
=
_mm_sub_ps
(
v_pre_sector1
,
v_sector1
);
v_h0
=
_mm_and_ps
(
v_p10
,
_mm_cmplt_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
)));
v_h1
=
_mm_and_ps
(
v_p11
,
_mm_cmplt_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
)));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_tab30
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_tab31
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
))));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_p20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_p21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_p20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_p21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
v_tab20
,
_mm_cmpgt_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
v_tab21
,
_mm_cmpgt_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
v_l0
=
_mm_and_ps
(
v_tab30
,
_mm_cmplt_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
)));
v_l1
=
_mm_and_ps
(
v_tab31
,
_mm_cmplt_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
)));
v_l0
=
_mm_or_ps
(
v_l0
,
_mm_and_ps
(
v_p20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
))));
v_l1
=
_mm_or_ps
(
v_l1
,
_mm_and_ps
(
v_p21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
))));
v_l0
=
_mm_or_ps
(
v_l0
,
_mm_and_ps
(
v_p20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
))));
v_l1
=
_mm_or_ps
(
v_l1
,
_mm_and_ps
(
v_p21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
))));
v_l0
=
_mm_or_ps
(
v_l0
,
_mm_and_ps
(
v_tab20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_l1
=
_mm_or_ps
(
v_l1
,
_mm_and_ps
(
v_tab21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_l0
=
_mm_or_ps
(
v_l0
,
_mm_and_ps
(
v_p10
,
_mm_cmpgt_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_l1
=
_mm_or_ps
(
v_l1
,
_mm_and_ps
(
v_p11
,
_mm_cmpgt_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_s0
=
_mm_and_ps
(
v_p20
,
_mm_cmplt_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
)));
v_s1
=
_mm_and_ps
(
v_p21
,
_mm_cmplt_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
)));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_tab20
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
1.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_tab21
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
1.0
f
))));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_p10
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
2.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_p11
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
2.0
f
))));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_p10
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
3.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_p11
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
3.0
f
))));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_tab30
,
_mm_cmpeq_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_tab31
,
_mm_cmpeq_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_and_ps
(
v_p20
,
_mm_cmpgt_ps
(
v_sector0
,
_mm_set1_ps
(
4.0
f
))));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_and_ps
(
v_p21
,
_mm_cmpgt_ps
(
v_sector1
,
_mm_set1_ps
(
4.0
f
))));
#if CV_SIMD128
inline
void
process
(
v_float32x4
&
v_h
,
v_float32x4
&
v_l
,
v_float32x4
&
v_s
)
const
{
v_float32x4
v_one
=
v_setall_f32
(
1.0
f
);
v_float32x4
v_l_le_half
=
v_l
<=
v_setall_f32
(
0.5
f
);
v_float32x4
v_ls
=
v_l
*
v_s
;
v_float32x4
v_elem0
=
v_select
(
v_l_le_half
,
v_ls
,
v_s
-
v_ls
);
v_float32x4
v_hs_raw
=
v_h
*
v_setall_f32
(
hscale
);
v_float32x4
v_pre_hs
=
v_cvt_f32
(
v_trunc
(
v_hs_raw
));
v_float32x4
v_hs
=
v_hs_raw
-
v_pre_hs
;
v_float32x4
v_sector
=
v_pre_hs
-
v_setall_f32
(
6.0
f
)
*
v_cvt_f32
(
v_trunc
(
v_hs_raw
*
v_setall_f32
(
1.0
f
/
6.0
f
)));
v_float32x4
v_elem1
=
v_hs
+
v_hs
;
v_float32x4
v_tab0
=
v_l
+
v_elem0
;
v_float32x4
v_tab1
=
v_l
-
v_elem0
;
v_float32x4
v_tab2
=
v_l
+
v_elem0
-
v_elem0
*
v_elem1
;
v_float32x4
v_tab3
=
v_l
-
v_elem0
+
v_elem0
*
v_elem1
;
v_float32x4
v_two
=
v_setall_f32
(
2.0
f
);
v_float32x4
v_four
=
v_setall_f32
(
4.0
f
);
v_h
=
v_select
(
v_sector
<
v_two
,
v_tab1
,
v_select
(
v_sector
<=
v_two
,
v_tab3
,
v_select
(
v_sector
<=
v_four
,
v_tab0
,
v_tab2
)));
v_l
=
v_select
(
v_sector
<
v_one
,
v_tab3
,
v_select
(
v_sector
<=
v_two
,
v_tab0
,
v_select
(
v_sector
<
v_four
,
v_tab2
,
v_tab1
)));
v_s
=
v_select
(
v_sector
<
v_one
,
v_tab0
,
v_select
(
v_sector
<
v_two
,
v_tab2
,
v_select
(
v_sector
<
v_four
,
v_tab1
,
v_select
(
v_sector
<=
v_four
,
v_tab3
,
v_tab0
))));
}
#endif
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
i
=
0
,
bidx
=
blueIdx
,
dcn
=
dstcn
;
float
_hscale
=
hscale
;
float
alpha
=
ColorChannel
<
float
>::
max
();
n
*=
3
;
#if CV_SSE2
if
(
haveSIMD
)
{
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
#if CV_SIMD128
if
(
hasSIMD
)
{
__m128
v_h0
=
_mm_loadu_ps
(
src
+
i
+
0
);
__m128
v_h1
=
_mm_loadu_ps
(
src
+
i
+
4
);
__m128
v_l0
=
_mm_loadu_ps
(
src
+
i
+
8
);
__m128
v_l1
=
_mm_loadu_ps
(
src
+
i
+
12
);
__m128
v_s0
=
_mm_loadu_ps
(
src
+
i
+
16
);
__m128
v_s1
=
_mm_loadu_ps
(
src
+
i
+
20
);
_mm_deinterleave_ps
(
v_h0
,
v_h1
,
v_l0
,
v_l1
,
v_s0
,
v_s1
);
process
(
v_h0
,
v_h1
,
v_l0
,
v_l1
,
v_s0
,
v_s1
);
if
(
dcn
==
3
)
{
if
(
bidx
)
{
_mm_interleave_ps
(
v_s0
,
v_s1
,
v_l0
,
v_l1
,
v_h0
,
v_h1
);
_mm_storeu_ps
(
dst
+
0
,
v_s0
)
;
_mm_storeu_ps
(
dst
+
4
,
v_s1
)
;
_mm_storeu_ps
(
dst
+
8
,
v_l0
)
;
_mm_storeu_ps
(
dst
+
12
,
v_l1
);
_mm_storeu_ps
(
dst
+
16
,
v_h0
);
_mm_storeu_ps
(
dst
+
20
,
v_h1
);
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
v_float32x4
v_h
;
v_float32x4
v_l
;
v_float32x4
v_s
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_l
,
v_s
);
process
(
v_h
,
v_l
,
v_s
);
v_store_interleave
(
dst
,
v_s
,
v_l
,
v_h
);
}
else
}
else
{
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
_mm_interleave_ps
(
v_h0
,
v_h1
,
v_l0
,
v_l1
,
v_s0
,
v_s1
);
_mm_storeu_ps
(
dst
+
0
,
v_h0
);
_mm_storeu_ps
(
dst
+
4
,
v_h1
);
_mm_storeu_ps
(
dst
+
8
,
v_l0
);
_mm_storeu_ps
(
dst
+
12
,
v_l1
);
_mm_storeu_ps
(
dst
+
16
,
v_s0
);
_mm_storeu_ps
(
dst
+
20
,
v_s1
);
v_float32x4
v_h
;
v_float32x4
v_l
;
v_float32x4
v_s
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_l
,
v_s
);
process
(
v_h
,
v_l
,
v_s
);
v_store_interleave
(
dst
,
v_h
,
v_l
,
v_s
);
}
}
else
{
__m128
v_a0
=
_mm_set1_ps
(
alpha
);
__m128
v_a1
=
_mm_set1_ps
(
alpha
);
}
else
{
// dcn == 4
if
(
bidx
)
{
_mm_interleave_ps
(
v_s0
,
v_s1
,
v_l0
,
v_l1
,
v_h0
,
v_h1
,
v_a0
,
v_a1
);
_mm_storeu_ps
(
dst
+
0
,
v_s0
);
_mm_storeu_ps
(
dst
+
4
,
v_s1
);
_mm_storeu_ps
(
dst
+
8
,
v_l0
);
_mm_storeu_ps
(
dst
+
12
,
v_l1
);
_mm_storeu_ps
(
dst
+
16
,
v_h0
);
_mm_storeu_ps
(
dst
+
20
,
v_h1
);
_mm_storeu_ps
(
dst
+
24
,
v_a0
);
_mm_storeu_ps
(
dst
+
28
,
v_a1
);
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
v_float32x4
v_h
;
v_float32x4
v_l
;
v_float32x4
v_s
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_l
,
v_s
);
process
(
v_h
,
v_l
,
v_s
);
v_float32x4
v_a
=
v_setall_f32
(
alpha
);
v_store_interleave
(
dst
,
v_s
,
v_l
,
v_h
,
v_a
);
}
else
}
else
{
for
(;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
_mm_interleave_ps
(
v_h0
,
v_h1
,
v_l0
,
v_l1
,
v_s0
,
v_s1
,
v_a0
,
v_a1
);
_mm_storeu_ps
(
dst
+
0
,
v_h0
);
_mm_storeu_ps
(
dst
+
4
,
v_h1
);
_mm_storeu_ps
(
dst
+
8
,
v_l0
);
_mm_storeu_ps
(
dst
+
12
,
v_l1
);
_mm_storeu_ps
(
dst
+
16
,
v_s0
);
_mm_storeu_ps
(
dst
+
20
,
v_s1
);
_mm_storeu_ps
(
dst
+
24
,
v_a0
);
_mm_storeu_ps
(
dst
+
28
,
v_a1
);
v_float32x4
v_h
;
v_float32x4
v_l
;
v_float32x4
v_s
;
v_load_deinterleave
(
src
+
i
,
v_h
,
v_l
,
v_s
);
process
(
v_h
,
v_l
,
v_s
);
v_float32x4
v_a
=
v_setall_f32
(
alpha
);
v_store_interleave
(
dst
,
v_h
,
v_l
,
v_s
,
v_a
);
}
}
}
}
#endif
for
(
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
float
h
=
src
[
i
],
l
=
src
[
i
+
1
],
s
=
src
[
i
+
2
];
...
...
@@ -1033,7 +977,7 @@ struct HLS2RGB_f
float
p2
=
l
<=
0.5
f
?
l
*
(
1
+
s
)
:
l
+
s
-
l
*
s
;
float
p1
=
2
*
l
-
p2
;
h
*=
_
hscale
;
h
*=
hscale
;
if
(
h
<
0
)
do
h
+=
6
;
while
(
h
<
0
);
else
if
(
h
>=
6
)
...
...
@@ -1063,8 +1007,8 @@ struct HLS2RGB_f
int
dstcn
,
blueIdx
;
float
hscale
;
#if CV_S
SE2
bool
ha
ve
SIMD
;
#if CV_S
IMD128
bool
ha
s
SIMD
;
#endif
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment