Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ad212776
Commit
ad212776
authored
Apr 19, 2018
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #11309 from K-Shinotsuka:issue41
parents
0477284b
b3755ae6
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
77 additions
and
94 deletions
+77
-94
color_hsv.cpp
modules/imgproc/src/color_hsv.cpp
+77
-94
No files found.
modules/imgproc/src/color_hsv.cpp
View file @
ad212776
...
...
@@ -523,62 +523,38 @@ struct RGB2HLS_f
RGB2HLS_f
(
int
_srccn
,
int
_blueIdx
,
float
_hrange
)
:
srccn
(
_srccn
),
blueIdx
(
_blueIdx
),
hscale
(
_hrange
/
360.
f
)
{
#if CV_S
SE2
ha
veSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#if CV_S
IMD128
ha
sSIMD
=
hasSIMD128
(
);
#endif
}
#if CV_S
SE2
void
process
(
__m128
&
v_b0
,
__m128
&
v_b1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_r0
,
__m128
&
v_r1
)
const
#if CV_S
IMD128
inline
void
process
(
v_float32x4
&
v_r
,
v_float32x4
&
v_g
,
v_float32x4
&
v_b
,
v_float32x4
&
v_hscale
)
const
{
__m128
v_max0
=
_mm_max_ps
(
_mm_max_ps
(
v_b0
,
v_g0
),
v_r0
);
__m128
v_max1
=
_mm_max_ps
(
_mm_max_ps
(
v_b1
,
v_g1
),
v_r1
);
__m128
v_min0
=
_mm_min_ps
(
_mm_min_ps
(
v_b0
,
v_g0
),
v_r0
);
__m128
v_min1
=
_mm_min_ps
(
_mm_min_ps
(
v_b1
,
v_g1
),
v_r1
);
__m128
v_diff0
=
_mm_sub_ps
(
v_max0
,
v_min0
);
__m128
v_diff1
=
_mm_sub_ps
(
v_max1
,
v_min1
);
__m128
v_sum0
=
_mm_add_ps
(
v_max0
,
v_min0
);
__m128
v_sum1
=
_mm_add_ps
(
v_max1
,
v_min1
);
__m128
v_l0
=
_mm_mul_ps
(
v_sum0
,
_mm_set1_ps
(
0.5
f
));
__m128
v_l1
=
_mm_mul_ps
(
v_sum1
,
_mm_set1_ps
(
0.5
f
));
__m128
v_gel0
=
_mm_cmpge_ps
(
v_l0
,
_mm_set1_ps
(
0.5
f
));
__m128
v_gel1
=
_mm_cmpge_ps
(
v_l1
,
_mm_set1_ps
(
0.5
f
));
__m128
v_s0
=
_mm_and_ps
(
v_gel0
,
_mm_sub_ps
(
_mm_set1_ps
(
2.0
f
),
v_sum0
));
__m128
v_s1
=
_mm_and_ps
(
v_gel1
,
_mm_sub_ps
(
_mm_set1_ps
(
2.0
f
),
v_sum1
));
v_s0
=
_mm_or_ps
(
v_s0
,
_mm_andnot_ps
(
v_gel0
,
v_sum0
));
v_s1
=
_mm_or_ps
(
v_s1
,
_mm_andnot_ps
(
v_gel1
,
v_sum1
));
v_s0
=
_mm_div_ps
(
v_diff0
,
v_s0
);
v_s1
=
_mm_div_ps
(
v_diff1
,
v_s1
);
__m128
v_gteps0
=
_mm_cmpgt_ps
(
v_diff0
,
_mm_set1_ps
(
FLT_EPSILON
));
__m128
v_gteps1
=
_mm_cmpgt_ps
(
v_diff1
,
_mm_set1_ps
(
FLT_EPSILON
));
v_diff0
=
_mm_div_ps
(
_mm_set1_ps
(
60.
f
),
v_diff0
);
v_diff1
=
_mm_div_ps
(
_mm_set1_ps
(
60.
f
),
v_diff1
);
__m128
v_eqr0
=
_mm_cmpeq_ps
(
v_max0
,
v_r0
);
__m128
v_eqr1
=
_mm_cmpeq_ps
(
v_max1
,
v_r1
);
__m128
v_h0
=
_mm_and_ps
(
v_eqr0
,
_mm_mul_ps
(
_mm_sub_ps
(
v_g0
,
v_b0
),
v_diff0
));
__m128
v_h1
=
_mm_and_ps
(
v_eqr1
,
_mm_mul_ps
(
_mm_sub_ps
(
v_g1
,
v_b1
),
v_diff1
));
__m128
v_eqg0
=
_mm_cmpeq_ps
(
v_max0
,
v_g0
);
__m128
v_eqg1
=
_mm_cmpeq_ps
(
v_max1
,
v_g1
);
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_and_ps
(
_mm_andnot_ps
(
v_eqr0
,
v_eqg0
),
_mm_add_ps
(
_mm_mul_ps
(
_mm_sub_ps
(
v_b0
,
v_r0
),
v_diff0
),
_mm_set1_ps
(
120.
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_and_ps
(
_mm_andnot_ps
(
v_eqr1
,
v_eqg1
),
_mm_add_ps
(
_mm_mul_ps
(
_mm_sub_ps
(
v_b1
,
v_r1
),
v_diff1
),
_mm_set1_ps
(
120.
f
))));
v_h0
=
_mm_or_ps
(
v_h0
,
_mm_andnot_ps
(
_mm_or_ps
(
v_eqr0
,
v_eqg0
),
_mm_add_ps
(
_mm_mul_ps
(
_mm_sub_ps
(
v_r0
,
v_g0
),
v_diff0
),
_mm_set1_ps
(
240.
f
))));
v_h1
=
_mm_or_ps
(
v_h1
,
_mm_andnot_ps
(
_mm_or_ps
(
v_eqr1
,
v_eqg1
),
_mm_add_ps
(
_mm_mul_ps
(
_mm_sub_ps
(
v_r1
,
v_g1
),
v_diff1
),
_mm_set1_ps
(
240.
f
))));
v_h0
=
_mm_add_ps
(
v_h0
,
_mm_and_ps
(
_mm_cmplt_ps
(
v_h0
,
_mm_setzero_ps
()),
_mm_set1_ps
(
360.
f
)));
v_h1
=
_mm_add_ps
(
v_h1
,
_mm_and_ps
(
_mm_cmplt_ps
(
v_h1
,
_mm_setzero_ps
()),
_mm_set1_ps
(
360.
f
)));
v_h0
=
_mm_mul_ps
(
v_h0
,
_mm_set1_ps
(
hscale
));
v_h1
=
_mm_mul_ps
(
v_h1
,
_mm_set1_ps
(
hscale
));
v_b0
=
_mm_and_ps
(
v_gteps0
,
v_h0
);
v_b1
=
_mm_and_ps
(
v_gteps1
,
v_h1
);
v_g0
=
v_l0
;
v_g1
=
v_l1
;
v_r0
=
_mm_and_ps
(
v_gteps0
,
v_s0
);
v_r1
=
_mm_and_ps
(
v_gteps1
,
v_s1
);
v_float32x4
v_max_rgb
=
v_max
(
v_max
(
v_r
,
v_g
),
v_b
);
v_float32x4
v_min_rgb
=
v_min
(
v_min
(
v_r
,
v_g
),
v_b
);
v_float32x4
v_diff
=
v_max_rgb
-
v_min_rgb
;
v_float32x4
v_sum
=
v_max_rgb
+
v_min_rgb
;
v_float32x4
v_half
=
v_setall_f32
(
0.5
f
);
v_float32x4
v_l
=
v_sum
*
v_half
;
v_float32x4
v_s
=
v_diff
/
v_select
(
v_l
<
v_half
,
v_sum
,
v_setall_f32
(
2.0
f
)
-
v_sum
);
v_float32x4
v_r_eq_max
=
v_max_rgb
==
v_r
;
v_float32x4
v_g_eq_max
=
v_max_rgb
==
v_g
;
v_float32x4
v_h
=
v_select
(
v_r_eq_max
,
v_g
-
v_b
,
v_select
(
v_g_eq_max
,
v_b
-
v_r
,
v_r
-
v_g
));
v_float32x4
v_res
=
v_select
(
v_r_eq_max
,
(
v_g
<
v_b
)
&
v_setall_f32
(
360.0
f
),
v_select
(
v_g_eq_max
,
v_setall_f32
(
120.0
f
),
v_setall_f32
(
240.0
f
)));
v_float32x4
v_rev_diff
=
v_setall_f32
(
60.0
f
)
/
v_diff
;
v_h
=
v_muladd
(
v_h
,
v_rev_diff
,
v_res
)
*
v_hscale
;
v_float32x4
v_diff_gt_eps
=
v_diff
>
v_setall_f32
(
FLT_EPSILON
);
v_r
=
v_diff_gt_eps
&
v_h
;
v_g
=
v_l
;
v_b
=
v_diff_gt_eps
&
v_s
;
}
#endif
...
...
@@ -587,49 +563,56 @@ struct RGB2HLS_f
int
i
=
0
,
bidx
=
blueIdx
,
scn
=
srccn
;
n
*=
3
;
#if CV_S
SE2
if
(
ha
ve
SIMD
)
#if CV_S
IMD128
if
(
ha
s
SIMD
)
{
for
(
;
i
<=
n
-
24
;
i
+=
24
,
src
+=
scn
*
8
)
{
__m128
v_b0
=
_mm_loadu_ps
(
src
+
0
);
__m128
v_b1
=
_mm_loadu_ps
(
src
+
4
);
__m128
v_g0
=
_mm_loadu_ps
(
src
+
8
);
__m128
v_g1
=
_mm_loadu_ps
(
src
+
12
);
__m128
v_r0
=
_mm_loadu_ps
(
src
+
16
);
__m128
v_r1
=
_mm_loadu_ps
(
src
+
20
);
if
(
scn
==
3
)
{
_mm_deinterleave_ps
(
v_b0
,
v_b1
,
v_g0
,
v_g1
,
v_r0
,
v_r1
);
}
else
{
__m128
v_a0
=
_mm_loadu_ps
(
src
+
24
);
__m128
v_a1
=
_mm_loadu_ps
(
src
+
28
);
_mm_deinterleave_ps
(
v_b0
,
v_b1
,
v_g0
,
v_g1
,
v_r0
,
v_r1
,
v_a0
,
v_a1
);
v_float32x4
v_hscale
=
v_setall_f32
(
hscale
);
if
(
scn
==
3
)
{
if
(
bidx
)
{
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
scn
*
4
)
{
v_float32x4
v_r
;
v_float32x4
v_g
;
v_float32x4
v_b
;
v_load_deinterleave
(
src
,
v_r
,
v_g
,
v_b
);
process
(
v_r
,
v_g
,
v_b
,
v_hscale
);
v_store_interleave
(
dst
+
i
,
v_r
,
v_g
,
v_b
);
}
}
else
{
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
scn
*
4
)
{
v_float32x4
v_r
;
v_float32x4
v_g
;
v_float32x4
v_b
;
v_load_deinterleave
(
src
,
v_r
,
v_g
,
v_b
);
process
(
v_b
,
v_g
,
v_r
,
v_hscale
);
v_store_interleave
(
dst
+
i
,
v_b
,
v_g
,
v_r
);
}
}
if
(
bidx
)
{
__m128
v_tmp0
=
v_b0
;
__m128
v_tmp1
=
v_b1
;
v_b0
=
v_r0
;
v_b1
=
v_r1
;
v_r0
=
v_tmp0
;
v_r1
=
v_tmp1
;
}
else
{
// scn == 4
if
(
bidx
)
{
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
scn
*
4
)
{
v_float32x4
v_r
;
v_float32x4
v_g
;
v_float32x4
v_b
;
v_float32x4
v_a
;
v_load_deinterleave
(
src
,
v_r
,
v_g
,
v_b
,
v_a
);
process
(
v_r
,
v_g
,
v_b
,
v_hscale
);
v_store_interleave
(
dst
+
i
,
v_r
,
v_g
,
v_b
);
}
}
else
{
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
scn
*
4
)
{
v_float32x4
v_r
;
v_float32x4
v_g
;
v_float32x4
v_b
;
v_float32x4
v_a
;
v_load_deinterleave
(
src
,
v_r
,
v_g
,
v_b
,
v_a
);
process
(
v_b
,
v_g
,
v_r
,
v_hscale
);
v_store_interleave
(
dst
+
i
,
v_b
,
v_g
,
v_r
);
}
}
process
(
v_b0
,
v_b1
,
v_g0
,
v_g1
,
v_r0
,
v_r1
);
_mm_interleave_ps
(
v_b0
,
v_b1
,
v_g0
,
v_g1
,
v_r0
,
v_r1
);
_mm_storeu_ps
(
dst
+
i
+
0
,
v_b0
);
_mm_storeu_ps
(
dst
+
i
+
4
,
v_b1
);
_mm_storeu_ps
(
dst
+
i
+
8
,
v_g0
);
_mm_storeu_ps
(
dst
+
i
+
12
,
v_g1
);
_mm_storeu_ps
(
dst
+
i
+
16
,
v_r0
);
_mm_storeu_ps
(
dst
+
i
+
20
,
v_r1
);
}
}
#endif
...
...
@@ -672,8 +655,8 @@ struct RGB2HLS_f
int
srccn
,
blueIdx
;
float
hscale
;
#if CV_S
SE2
bool
ha
ve
SIMD
;
#if CV_S
IMD128
bool
ha
s
SIMD
;
#endif
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment