Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
992795d4
Commit
992795d4
authored
Oct 22, 2016
by
k-shinotsuka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add SSE code for RGB2Luv_f.
parent
dd379ec9
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
165 additions
and
7 deletions
+165
-7
color.cpp
modules/imgproc/src/color.cpp
+165
-7
No files found.
modules/imgproc/src/color.cpp
View file @
992795d4
...
@@ -141,6 +141,39 @@ template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab
...
@@ -141,6 +141,39 @@ template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab
return
((
tab
[
3
]
*
x
+
tab
[
2
])
*
x
+
tab
[
1
])
*
x
+
tab
[
0
];
return
((
tab
[
3
]
*
x
+
tab
[
2
])
*
x
+
tab
[
1
])
*
x
+
tab
[
0
];
}
}
#if CV_SSE2
template
<
typename
_Tp
>
static
inline
void
splineInterpolate
(
__m128
&
v_x
,
const
_Tp
*
tab
,
int
n
)
{
__m128i
v_ix
=
_mm_cvtps_epi32
(
_mm_min_ps
(
_mm_max_ps
(
v_x
,
_mm_setzero_ps
()),
_mm_set1_ps
(
float
(
n
-
1
))));
v_x
=
_mm_sub_ps
(
v_x
,
_mm_cvtepi32_ps
(
v_ix
));
v_ix
=
_mm_slli_epi32
(
v_ix
,
2
);
int
CV_DECL_ALIGNED
(
16
)
ix
[
4
];
_mm_store_si128
((
__m128i
*
)
ix
,
v_ix
);
__m128
v_tab0
=
_mm_loadu_ps
(
tab
+
ix
[
0
]);
__m128
v_tab1
=
_mm_loadu_ps
(
tab
+
ix
[
1
]);
__m128
v_tab2
=
_mm_loadu_ps
(
tab
+
ix
[
2
]);
__m128
v_tab3
=
_mm_loadu_ps
(
tab
+
ix
[
3
]);
__m128
v_tmp0
=
_mm_unpacklo_ps
(
v_tab0
,
v_tab1
);
__m128
v_tmp1
=
_mm_unpacklo_ps
(
v_tab2
,
v_tab3
);
__m128
v_tmp2
=
_mm_unpackhi_ps
(
v_tab0
,
v_tab1
);
__m128
v_tmp3
=
_mm_unpackhi_ps
(
v_tab2
,
v_tab3
);
v_tab0
=
_mm_shuffle_ps
(
v_tmp0
,
v_tmp1
,
0x44
);
v_tab2
=
_mm_shuffle_ps
(
v_tmp2
,
v_tmp3
,
0x44
);
v_tab1
=
_mm_shuffle_ps
(
v_tmp0
,
v_tmp1
,
0xee
);
v_tab3
=
_mm_shuffle_ps
(
v_tmp2
,
v_tmp3
,
0xee
);
__m128
v_l
=
_mm_mul_ps
(
v_x
,
v_tab3
);
v_l
=
_mm_add_ps
(
v_l
,
v_tab2
);
v_l
=
_mm_mul_ps
(
v_l
,
v_x
);
v_l
=
_mm_add_ps
(
v_l
,
v_tab1
);
v_l
=
_mm_mul_ps
(
v_l
,
v_x
);
v_x
=
_mm_add_ps
(
v_l
,
v_tab0
);
}
#endif
template
<
typename
_Tp
>
struct
ColorChannel
template
<
typename
_Tp
>
struct
ColorChannel
{
{
...
@@ -5766,24 +5799,146 @@ struct RGB2Luv_f
...
@@ -5766,24 +5799,146 @@ struct RGB2Luv_f
}
}
float
d
=
1.
f
/
(
whitept
[
0
]
+
whitept
[
1
]
*
15
+
whitept
[
2
]
*
3
);
float
d
=
1.
f
/
(
whitept
[
0
]
+
whitept
[
1
]
*
15
+
whitept
[
2
]
*
3
);
un
=
4
*
whitept
[
0
]
*
d
;
un
=
4
*
whitept
[
0
]
*
d
*
13
;
vn
=
9
*
whitept
[
1
]
*
d
;
vn
=
9
*
whitept
[
1
]
*
d
*
13
;
#if CV_SSE2
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
CV_Assert
(
whitept
[
1
]
==
1.
f
);
CV_Assert
(
whitept
[
1
]
==
1.
f
);
}
}
#if CV_SSE2
void
process
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
)
const
{
__m128
v_x0
=
_mm_mul_ps
(
v_r0
,
_mm_set1_ps
(
coeffs
[
0
]));
__m128
v_x1
=
_mm_mul_ps
(
v_r1
,
_mm_set1_ps
(
coeffs
[
0
]));
__m128
v_y0
=
_mm_mul_ps
(
v_r0
,
_mm_set1_ps
(
coeffs
[
3
]));
__m128
v_y1
=
_mm_mul_ps
(
v_r1
,
_mm_set1_ps
(
coeffs
[
3
]));
__m128
v_z0
=
_mm_mul_ps
(
v_r0
,
_mm_set1_ps
(
coeffs
[
6
]));
__m128
v_z1
=
_mm_mul_ps
(
v_r1
,
_mm_set1_ps
(
coeffs
[
6
]));
v_x0
=
_mm_add_ps
(
v_x0
,
_mm_mul_ps
(
v_g0
,
_mm_set1_ps
(
coeffs
[
1
])));
v_x1
=
_mm_add_ps
(
v_x1
,
_mm_mul_ps
(
v_g1
,
_mm_set1_ps
(
coeffs
[
1
])));
v_y0
=
_mm_add_ps
(
v_y0
,
_mm_mul_ps
(
v_g0
,
_mm_set1_ps
(
coeffs
[
4
])));
v_y1
=
_mm_add_ps
(
v_y1
,
_mm_mul_ps
(
v_g1
,
_mm_set1_ps
(
coeffs
[
4
])));
v_z0
=
_mm_add_ps
(
v_z0
,
_mm_mul_ps
(
v_g0
,
_mm_set1_ps
(
coeffs
[
7
])));
v_z1
=
_mm_add_ps
(
v_z1
,
_mm_mul_ps
(
v_g1
,
_mm_set1_ps
(
coeffs
[
7
])));
v_x0
=
_mm_add_ps
(
v_x0
,
_mm_mul_ps
(
v_b0
,
_mm_set1_ps
(
coeffs
[
2
])));
v_x1
=
_mm_add_ps
(
v_x1
,
_mm_mul_ps
(
v_b1
,
_mm_set1_ps
(
coeffs
[
2
])));
v_y0
=
_mm_add_ps
(
v_y0
,
_mm_mul_ps
(
v_b0
,
_mm_set1_ps
(
coeffs
[
5
])));
v_y1
=
_mm_add_ps
(
v_y1
,
_mm_mul_ps
(
v_b1
,
_mm_set1_ps
(
coeffs
[
5
])));
v_z0
=
_mm_add_ps
(
v_z0
,
_mm_mul_ps
(
v_b0
,
_mm_set1_ps
(
coeffs
[
8
])));
v_z1
=
_mm_add_ps
(
v_z1
,
_mm_mul_ps
(
v_b1
,
_mm_set1_ps
(
coeffs
[
8
])));
__m128
v_l0
=
_mm_mul_ps
(
v_y0
,
_mm_set1_ps
(
LabCbrtTabScale
));
__m128
v_l1
=
_mm_mul_ps
(
v_y1
,
_mm_set1_ps
(
LabCbrtTabScale
));
splineInterpolate
(
v_l0
,
LabCbrtTab
,
LAB_CBRT_TAB_SIZE
);
splineInterpolate
(
v_l1
,
LabCbrtTab
,
LAB_CBRT_TAB_SIZE
);
v_l0
=
_mm_mul_ps
(
v_l0
,
_mm_set1_ps
(
116.0
f
));
v_l1
=
_mm_mul_ps
(
v_l1
,
_mm_set1_ps
(
116.0
f
));
v_r0
=
_mm_sub_ps
(
v_l0
,
_mm_set1_ps
(
16.0
f
));
v_r1
=
_mm_sub_ps
(
v_l1
,
_mm_set1_ps
(
16.0
f
));
v_z0
=
_mm_mul_ps
(
v_z0
,
_mm_set1_ps
(
3.0
f
));
v_z1
=
_mm_mul_ps
(
v_z1
,
_mm_set1_ps
(
3.0
f
));
v_z0
=
_mm_add_ps
(
v_z0
,
v_x0
);
v_z1
=
_mm_add_ps
(
v_z1
,
v_x1
);
v_z0
=
_mm_add_ps
(
v_z0
,
_mm_mul_ps
(
v_y0
,
_mm_set1_ps
(
15.0
f
)));
v_z1
=
_mm_add_ps
(
v_z1
,
_mm_mul_ps
(
v_y1
,
_mm_set1_ps
(
15.0
f
)));
v_z0
=
_mm_max_ps
(
v_z0
,
_mm_set1_ps
(
FLT_EPSILON
));
v_z1
=
_mm_max_ps
(
v_z1
,
_mm_set1_ps
(
FLT_EPSILON
));
__m128
v_d0
=
_mm_div_ps
(
_mm_set1_ps
(
52.0
f
),
v_z0
);
__m128
v_d1
=
_mm_div_ps
(
_mm_set1_ps
(
52.0
f
),
v_z1
);
v_x0
=
_mm_mul_ps
(
v_x0
,
v_d0
);
v_x1
=
_mm_mul_ps
(
v_x1
,
v_d1
);
v_x0
=
_mm_sub_ps
(
v_x0
,
_mm_set1_ps
(
un
));
v_x1
=
_mm_sub_ps
(
v_x1
,
_mm_set1_ps
(
un
));
v_g0
=
_mm_mul_ps
(
v_x0
,
v_r0
);
v_g1
=
_mm_mul_ps
(
v_x1
,
v_r1
);
v_y0
=
_mm_mul_ps
(
v_y0
,
v_d0
);
v_y1
=
_mm_mul_ps
(
v_y1
,
v_d1
);
v_y0
=
_mm_mul_ps
(
v_y0
,
_mm_set1_ps
(
2.25
f
));
v_y1
=
_mm_mul_ps
(
v_y1
,
_mm_set1_ps
(
2.25
f
));
v_y0
=
_mm_sub_ps
(
v_y0
,
_mm_set1_ps
(
vn
));
v_y1
=
_mm_sub_ps
(
v_y1
,
_mm_set1_ps
(
vn
));
v_b0
=
_mm_mul_ps
(
v_y0
,
v_r0
);
v_b1
=
_mm_mul_ps
(
v_y1
,
v_r1
);
}
#endif
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
{
int
i
,
scn
=
srccn
;
int
i
=
0
,
scn
=
srccn
;
float
gscale
=
GammaTabScale
;
float
gscale
=
GammaTabScale
;
const
float
*
gammaTab
=
srgb
?
sRGBGammaTab
:
0
;
const
float
*
gammaTab
=
srgb
?
sRGBGammaTab
:
0
;
float
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
float
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
],
C5
=
coeffs
[
5
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
],
C5
=
coeffs
[
5
],
C6
=
coeffs
[
6
],
C7
=
coeffs
[
7
],
C8
=
coeffs
[
8
];
C6
=
coeffs
[
6
],
C7
=
coeffs
[
7
],
C8
=
coeffs
[
8
];
float
_un
=
13
*
un
,
_vn
=
13
*
vn
;
n
*=
3
;
n
*=
3
;
for
(
i
=
0
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
#if CV_SSE2
if
(
haveSIMD
)
{
for
(
;
i
<=
n
-
24
;
i
+=
24
,
src
+=
scn
*
8
)
{
__m128
v_r0
=
_mm_loadu_ps
(
src
+
0
);
__m128
v_r1
=
_mm_loadu_ps
(
src
+
4
);
__m128
v_g0
=
_mm_loadu_ps
(
src
+
8
);
__m128
v_g1
=
_mm_loadu_ps
(
src
+
12
);
__m128
v_b0
=
_mm_loadu_ps
(
src
+
16
);
__m128
v_b1
=
_mm_loadu_ps
(
src
+
20
);
if
(
scn
==
3
)
{
_mm_deinterleave_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
}
else
{
__m128
v_a0
=
_mm_loadu_ps
(
src
+
24
);
__m128
v_a1
=
_mm_loadu_ps
(
src
+
28
);
_mm_deinterleave_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
,
v_a0
,
v_a1
);
}
if
(
gammaTab
)
{
__m128
v_gscale
=
_mm_set1_ps
(
gscale
);
v_r0
=
_mm_mul_ps
(
v_r0
,
v_gscale
);
v_r1
=
_mm_mul_ps
(
v_r1
,
v_gscale
);
v_g0
=
_mm_mul_ps
(
v_g0
,
v_gscale
);
v_g1
=
_mm_mul_ps
(
v_g1
,
v_gscale
);
v_b0
=
_mm_mul_ps
(
v_b0
,
v_gscale
);
v_b1
=
_mm_mul_ps
(
v_b1
,
v_gscale
);
splineInterpolate
(
v_r0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_r1
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_g0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_g1
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_b0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_b1
,
gammaTab
,
GAMMA_TAB_SIZE
);
}
process
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_interleave_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_storeu_ps
(
dst
+
i
+
0
,
v_r0
);
_mm_storeu_ps
(
dst
+
i
+
4
,
v_r1
);
_mm_storeu_ps
(
dst
+
i
+
8
,
v_g0
);
_mm_storeu_ps
(
dst
+
i
+
12
,
v_g1
);
_mm_storeu_ps
(
dst
+
i
+
16
,
v_b0
);
_mm_storeu_ps
(
dst
+
i
+
20
,
v_b1
);
}
}
#endif
for
(
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
{
{
float
R
=
src
[
0
],
G
=
src
[
1
],
B
=
src
[
2
];
float
R
=
src
[
0
],
G
=
src
[
1
],
B
=
src
[
2
];
if
(
gammaTab
)
if
(
gammaTab
)
...
@@ -5801,8 +5956,8 @@ struct RGB2Luv_f
...
@@ -5801,8 +5956,8 @@ struct RGB2Luv_f
L
=
116.
f
*
L
-
16.
f
;
L
=
116.
f
*
L
-
16.
f
;
float
d
=
(
4
*
13
)
/
std
::
max
(
X
+
15
*
Y
+
3
*
Z
,
FLT_EPSILON
);
float
d
=
(
4
*
13
)
/
std
::
max
(
X
+
15
*
Y
+
3
*
Z
,
FLT_EPSILON
);
float
u
=
L
*
(
X
*
d
-
_
un
);
float
u
=
L
*
(
X
*
d
-
un
);
float
v
=
L
*
((
9
*
0.25
f
)
*
Y
*
d
-
_
vn
);
float
v
=
L
*
((
9
*
0.25
f
)
*
Y
*
d
-
vn
);
dst
[
i
]
=
L
;
dst
[
i
+
1
]
=
u
;
dst
[
i
+
2
]
=
v
;
dst
[
i
]
=
L
;
dst
[
i
+
1
]
=
u
;
dst
[
i
+
2
]
=
v
;
}
}
...
@@ -5811,6 +5966,9 @@ struct RGB2Luv_f
...
@@ -5811,6 +5966,9 @@ struct RGB2Luv_f
int
srccn
;
int
srccn
;
float
coeffs
[
9
],
un
,
vn
;
float
coeffs
[
9
],
un
,
vn
;
bool
srgb
;
bool
srgb
;
#if CV_SSE2
bool
haveSIMD
;
#endif
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment