Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a7db950b
Commit
a7db950b
authored
Oct 30, 2016
by
k-shinotsuka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add SSE code for Lab2RGB_f.
parent
7fe0fb6b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
166 additions
and
5 deletions
+166
-5
color.cpp
modules/imgproc/src/color.cpp
+166
-5
No files found.
modules/imgproc/src/color.cpp
View file @
a7db950b
...
...
@@ -144,7 +144,7 @@ template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab
#if CV_SSE2
template
<
typename
_Tp
>
static
inline
void
splineInterpolate
(
__m128
&
v_x
,
const
_Tp
*
tab
,
int
n
)
{
__m128i
v_ix
=
_mm_cvtps_epi32
(
_mm_min_ps
(
_mm_max_ps
(
v_x
,
_mm_setzero_ps
()),
_mm_set1_ps
(
float
(
n
-
1
))));
__m128i
v_ix
=
_mm_cvt
t
ps_epi32
(
_mm_min_ps
(
_mm_max_ps
(
v_x
,
_mm_setzero_ps
()),
_mm_set1_ps
(
float
(
n
-
1
))));
v_x
=
_mm_sub_ps
(
v_x
,
_mm_cvtepi32_ps
(
v_ix
));
v_ix
=
_mm_slli_epi32
(
v_ix
,
2
);
...
...
@@ -5474,11 +5474,106 @@ struct Lab2RGB_f
coeffs
[
i
+
3
]
=
_coeffs
[
i
+
3
]
*
_whitept
[
i
];
coeffs
[
i
+
blueIdx
*
3
]
=
_coeffs
[
i
+
6
]
*
_whitept
[
i
];
}
lThresh
=
0.008856
f
*
903.3
f
;
fThresh
=
7.787
f
*
0.008856
f
+
16.0
f
/
116.0
f
;
#if CV_SSE2
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
}
#if CV_SSE2
void
process
(
__m128
&
v_li0
,
__m128
&
v_li1
,
__m128
&
v_ai0
,
__m128
&
v_ai1
,
__m128
&
v_bi0
,
__m128
&
v_bi1
)
const
{
__m128
v_y00
=
_mm_mul_ps
(
v_li0
,
_mm_set1_ps
(
1.0
f
/
903.3
f
));
__m128
v_y01
=
_mm_mul_ps
(
v_li1
,
_mm_set1_ps
(
1.0
f
/
903.3
f
));
__m128
v_fy00
=
_mm_add_ps
(
_mm_mul_ps
(
_mm_set1_ps
(
7.787
f
),
v_y00
),
_mm_set1_ps
(
16.0
f
/
116.0
f
));
__m128
v_fy01
=
_mm_add_ps
(
_mm_mul_ps
(
_mm_set1_ps
(
7.787
f
),
v_y01
),
_mm_set1_ps
(
16.0
f
/
116.0
f
));
__m128
v_fy10
=
_mm_mul_ps
(
_mm_add_ps
(
v_li0
,
_mm_set1_ps
(
16.0
f
)),
_mm_set1_ps
(
1.0
f
/
116.0
f
));
__m128
v_fy11
=
_mm_mul_ps
(
_mm_add_ps
(
v_li1
,
_mm_set1_ps
(
16.0
f
)),
_mm_set1_ps
(
1.0
f
/
116.0
f
));
__m128
v_y10
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fy10
,
v_fy10
),
v_fy10
);
__m128
v_y11
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fy11
,
v_fy11
),
v_fy11
);
__m128
v_cmpli0
=
_mm_cmple_ps
(
v_li0
,
_mm_set1_ps
(
lThresh
));
__m128
v_cmpli1
=
_mm_cmple_ps
(
v_li1
,
_mm_set1_ps
(
lThresh
));
v_y00
=
_mm_and_ps
(
v_cmpli0
,
v_y00
);
v_y01
=
_mm_and_ps
(
v_cmpli1
,
v_y01
);
v_fy00
=
_mm_and_ps
(
v_cmpli0
,
v_fy00
);
v_fy01
=
_mm_and_ps
(
v_cmpli1
,
v_fy01
);
v_y10
=
_mm_andnot_ps
(
v_cmpli0
,
v_y10
);
v_y11
=
_mm_andnot_ps
(
v_cmpli1
,
v_y11
);
v_fy10
=
_mm_andnot_ps
(
v_cmpli0
,
v_fy10
);
v_fy11
=
_mm_andnot_ps
(
v_cmpli1
,
v_fy11
);
__m128
v_y0
=
_mm_or_ps
(
v_y00
,
v_y10
);
__m128
v_y1
=
_mm_or_ps
(
v_y01
,
v_y11
);
__m128
v_fy0
=
_mm_or_ps
(
v_fy00
,
v_fy10
);
__m128
v_fy1
=
_mm_or_ps
(
v_fy01
,
v_fy11
);
__m128
v_fxz00
=
_mm_add_ps
(
v_fy0
,
_mm_mul_ps
(
v_ai0
,
_mm_set1_ps
(
0.002
f
)));
__m128
v_fxz01
=
_mm_add_ps
(
v_fy1
,
_mm_mul_ps
(
v_ai1
,
_mm_set1_ps
(
0.002
f
)));
__m128
v_fxz10
=
_mm_sub_ps
(
v_fy0
,
_mm_mul_ps
(
v_bi0
,
_mm_set1_ps
(
0.005
f
)));
__m128
v_fxz11
=
_mm_sub_ps
(
v_fy1
,
_mm_mul_ps
(
v_bi1
,
_mm_set1_ps
(
0.005
f
)));
__m128
v_fxz000
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz00
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz001
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz01
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz010
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz10
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz011
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz11
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz100
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz00
,
v_fxz00
),
v_fxz00
);
__m128
v_fxz101
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz01
,
v_fxz01
),
v_fxz01
);
__m128
v_fxz110
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz10
,
v_fxz10
),
v_fxz10
);
__m128
v_fxz111
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz11
,
v_fxz11
),
v_fxz11
);
__m128
v_cmpfxz00
=
_mm_cmple_ps
(
v_fxz00
,
_mm_set1_ps
(
fThresh
));
__m128
v_cmpfxz01
=
_mm_cmple_ps
(
v_fxz01
,
_mm_set1_ps
(
fThresh
));
__m128
v_cmpfxz10
=
_mm_cmple_ps
(
v_fxz10
,
_mm_set1_ps
(
fThresh
));
__m128
v_cmpfxz11
=
_mm_cmple_ps
(
v_fxz11
,
_mm_set1_ps
(
fThresh
));
v_fxz000
=
_mm_and_ps
(
v_cmpfxz00
,
v_fxz000
);
v_fxz001
=
_mm_and_ps
(
v_cmpfxz01
,
v_fxz001
);
v_fxz010
=
_mm_and_ps
(
v_cmpfxz10
,
v_fxz010
);
v_fxz011
=
_mm_and_ps
(
v_cmpfxz11
,
v_fxz011
);
v_fxz100
=
_mm_andnot_ps
(
v_cmpfxz00
,
v_fxz100
);
v_fxz101
=
_mm_andnot_ps
(
v_cmpfxz01
,
v_fxz101
);
v_fxz110
=
_mm_andnot_ps
(
v_cmpfxz10
,
v_fxz110
);
v_fxz111
=
_mm_andnot_ps
(
v_cmpfxz11
,
v_fxz111
);
__m128
v_x0
=
_mm_or_ps
(
v_fxz000
,
v_fxz100
);
__m128
v_x1
=
_mm_or_ps
(
v_fxz001
,
v_fxz101
);
__m128
v_z0
=
_mm_or_ps
(
v_fxz010
,
v_fxz110
);
__m128
v_z1
=
_mm_or_ps
(
v_fxz011
,
v_fxz111
);
__m128
v_ro0
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
0
]),
v_x0
);
__m128
v_ro1
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
0
]),
v_x1
);
__m128
v_go0
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
3
]),
v_x0
);
__m128
v_go1
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
3
]),
v_x1
);
__m128
v_bo0
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
6
]),
v_x0
);
__m128
v_bo1
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
6
]),
v_x1
);
v_ro0
=
_mm_add_ps
(
v_ro0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
1
]),
v_y0
));
v_ro1
=
_mm_add_ps
(
v_ro1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
1
]),
v_y1
));
v_go0
=
_mm_add_ps
(
v_go0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
4
]),
v_y0
));
v_go1
=
_mm_add_ps
(
v_go1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
4
]),
v_y1
));
v_bo0
=
_mm_add_ps
(
v_bo0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
7
]),
v_y0
));
v_bo1
=
_mm_add_ps
(
v_bo1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
7
]),
v_y1
));
v_ro0
=
_mm_add_ps
(
v_ro0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
2
]),
v_z0
));
v_ro1
=
_mm_add_ps
(
v_ro1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
2
]),
v_z1
));
v_go0
=
_mm_add_ps
(
v_go0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
5
]),
v_z0
));
v_go1
=
_mm_add_ps
(
v_go1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
5
]),
v_z1
));
v_bo0
=
_mm_add_ps
(
v_bo0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
8
]),
v_z0
));
v_bo1
=
_mm_add_ps
(
v_bo1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
8
]),
v_z1
));
v_li0
=
_mm_min_ps
(
_mm_max_ps
(
v_ro0
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_li1
=
_mm_min_ps
(
_mm_max_ps
(
v_ro1
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_ai0
=
_mm_min_ps
(
_mm_max_ps
(
v_go0
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_ai1
=
_mm_min_ps
(
_mm_max_ps
(
v_go1
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_bi0
=
_mm_min_ps
(
_mm_max_ps
(
v_bo0
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_bi1
=
_mm_min_ps
(
_mm_max_ps
(
v_bo1
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
}
#endif
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
i
,
dcn
=
dstcn
;
int
i
=
0
,
dcn
=
dstcn
;
const
float
*
gammaTab
=
srgb
?
sRGBInvGammaTab
:
0
;
float
gscale
=
GammaTabScale
;
float
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
...
...
@@ -5487,9 +5582,70 @@ struct Lab2RGB_f
float
alpha
=
ColorChannel
<
float
>::
max
();
n
*=
3
;
static
const
float
lThresh
=
0.008856
f
*
903.3
f
;
static
const
float
fThresh
=
7.787
f
*
0.008856
f
+
16.0
f
/
116.0
f
;
for
(
i
=
0
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
#if CV_SSE2
if
(
haveSIMD
)
{
for
(;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128
v_li0
=
_mm_loadu_ps
(
src
+
i
+
0
);
__m128
v_li1
=
_mm_loadu_ps
(
src
+
i
+
4
);
__m128
v_ai0
=
_mm_loadu_ps
(
src
+
i
+
8
);
__m128
v_ai1
=
_mm_loadu_ps
(
src
+
i
+
12
);
__m128
v_bi0
=
_mm_loadu_ps
(
src
+
i
+
16
);
__m128
v_bi1
=
_mm_loadu_ps
(
src
+
i
+
20
);
_mm_deinterleave_ps
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
);
process
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
);
if
(
gammaTab
)
{
__m128
v_gscale
=
_mm_set1_ps
(
gscale
);
v_li0
=
_mm_mul_ps
(
v_li0
,
v_gscale
);
v_li1
=
_mm_mul_ps
(
v_li1
,
v_gscale
);
v_ai0
=
_mm_mul_ps
(
v_ai0
,
v_gscale
);
v_ai1
=
_mm_mul_ps
(
v_ai1
,
v_gscale
);
v_bi0
=
_mm_mul_ps
(
v_bi0
,
v_gscale
);
v_bi1
=
_mm_mul_ps
(
v_bi1
,
v_gscale
);
splineInterpolate
(
v_li0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_li1
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_ai0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_ai1
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_bi0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_bi1
,
gammaTab
,
GAMMA_TAB_SIZE
);
}
if
(
dcn
==
4
)
{
__m128
v_a0
=
_mm_set1_ps
(
alpha
);
__m128
v_a1
=
_mm_set1_ps
(
alpha
);
_mm_interleave_ps
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
,
v_a0
,
v_a1
);
_mm_storeu_ps
(
dst
+
0
,
v_li0
);
_mm_storeu_ps
(
dst
+
4
,
v_li1
);
_mm_storeu_ps
(
dst
+
8
,
v_ai0
);
_mm_storeu_ps
(
dst
+
12
,
v_ai1
);
_mm_storeu_ps
(
dst
+
16
,
v_bi0
);
_mm_storeu_ps
(
dst
+
20
,
v_bi1
);
_mm_storeu_ps
(
dst
+
24
,
v_a0
);
_mm_storeu_ps
(
dst
+
28
,
v_a1
);
}
else
{
_mm_interleave_ps
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
);
_mm_storeu_ps
(
dst
+
0
,
v_li0
);
_mm_storeu_ps
(
dst
+
4
,
v_li1
);
_mm_storeu_ps
(
dst
+
8
,
v_ai0
);
_mm_storeu_ps
(
dst
+
12
,
v_ai1
);
_mm_storeu_ps
(
dst
+
16
,
v_bi0
);
_mm_storeu_ps
(
dst
+
20
,
v_bi1
);
}
}
}
#endif
for
(;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
float
li
=
src
[
i
];
float
ai
=
src
[
i
+
1
];
...
...
@@ -5540,6 +5696,11 @@ struct Lab2RGB_f
int
dstcn
;
float
coeffs
[
9
];
bool
srgb
;
float
lThresh
;
float
fThresh
;
#if CV_SSE2
bool
haveSIMD
;
#endif
};
#undef clip
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment