Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
01f355df
Commit
01f355df
authored
Nov 02, 2016
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7583 from K-Shinotsuka:issue33
parents
7dd99258
a7db950b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
166 additions
and
5 deletions
+166
-5
color.cpp
modules/imgproc/src/color.cpp
+166
-5
No files found.
modules/imgproc/src/color.cpp
View file @
01f355df
...
...
@@ -144,7 +144,7 @@ template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab
#if CV_SSE2
template
<
typename
_Tp
>
static
inline
void
splineInterpolate
(
__m128
&
v_x
,
const
_Tp
*
tab
,
int
n
)
{
__m128i
v_ix
=
_mm_cvtps_epi32
(
_mm_min_ps
(
_mm_max_ps
(
v_x
,
_mm_setzero_ps
()),
_mm_set1_ps
(
float
(
n
-
1
))));
__m128i
v_ix
=
_mm_cvt
t
ps_epi32
(
_mm_min_ps
(
_mm_max_ps
(
v_x
,
_mm_setzero_ps
()),
_mm_set1_ps
(
float
(
n
-
1
))));
v_x
=
_mm_sub_ps
(
v_x
,
_mm_cvtepi32_ps
(
v_ix
));
v_ix
=
_mm_slli_epi32
(
v_ix
,
2
);
...
...
@@ -5474,11 +5474,106 @@ struct Lab2RGB_f
coeffs
[
i
+
3
]
=
_coeffs
[
i
+
3
]
*
_whitept
[
i
];
coeffs
[
i
+
blueIdx
*
3
]
=
_coeffs
[
i
+
6
]
*
_whitept
[
i
];
}
lThresh
=
0.008856
f
*
903.3
f
;
fThresh
=
7.787
f
*
0.008856
f
+
16.0
f
/
116.0
f
;
#if CV_SSE2
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
#endif
}
#if CV_SSE2
void
process
(
__m128
&
v_li0
,
__m128
&
v_li1
,
__m128
&
v_ai0
,
__m128
&
v_ai1
,
__m128
&
v_bi0
,
__m128
&
v_bi1
)
const
{
__m128
v_y00
=
_mm_mul_ps
(
v_li0
,
_mm_set1_ps
(
1.0
f
/
903.3
f
));
__m128
v_y01
=
_mm_mul_ps
(
v_li1
,
_mm_set1_ps
(
1.0
f
/
903.3
f
));
__m128
v_fy00
=
_mm_add_ps
(
_mm_mul_ps
(
_mm_set1_ps
(
7.787
f
),
v_y00
),
_mm_set1_ps
(
16.0
f
/
116.0
f
));
__m128
v_fy01
=
_mm_add_ps
(
_mm_mul_ps
(
_mm_set1_ps
(
7.787
f
),
v_y01
),
_mm_set1_ps
(
16.0
f
/
116.0
f
));
__m128
v_fy10
=
_mm_mul_ps
(
_mm_add_ps
(
v_li0
,
_mm_set1_ps
(
16.0
f
)),
_mm_set1_ps
(
1.0
f
/
116.0
f
));
__m128
v_fy11
=
_mm_mul_ps
(
_mm_add_ps
(
v_li1
,
_mm_set1_ps
(
16.0
f
)),
_mm_set1_ps
(
1.0
f
/
116.0
f
));
__m128
v_y10
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fy10
,
v_fy10
),
v_fy10
);
__m128
v_y11
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fy11
,
v_fy11
),
v_fy11
);
__m128
v_cmpli0
=
_mm_cmple_ps
(
v_li0
,
_mm_set1_ps
(
lThresh
));
__m128
v_cmpli1
=
_mm_cmple_ps
(
v_li1
,
_mm_set1_ps
(
lThresh
));
v_y00
=
_mm_and_ps
(
v_cmpli0
,
v_y00
);
v_y01
=
_mm_and_ps
(
v_cmpli1
,
v_y01
);
v_fy00
=
_mm_and_ps
(
v_cmpli0
,
v_fy00
);
v_fy01
=
_mm_and_ps
(
v_cmpli1
,
v_fy01
);
v_y10
=
_mm_andnot_ps
(
v_cmpli0
,
v_y10
);
v_y11
=
_mm_andnot_ps
(
v_cmpli1
,
v_y11
);
v_fy10
=
_mm_andnot_ps
(
v_cmpli0
,
v_fy10
);
v_fy11
=
_mm_andnot_ps
(
v_cmpli1
,
v_fy11
);
__m128
v_y0
=
_mm_or_ps
(
v_y00
,
v_y10
);
__m128
v_y1
=
_mm_or_ps
(
v_y01
,
v_y11
);
__m128
v_fy0
=
_mm_or_ps
(
v_fy00
,
v_fy10
);
__m128
v_fy1
=
_mm_or_ps
(
v_fy01
,
v_fy11
);
__m128
v_fxz00
=
_mm_add_ps
(
v_fy0
,
_mm_mul_ps
(
v_ai0
,
_mm_set1_ps
(
0.002
f
)));
__m128
v_fxz01
=
_mm_add_ps
(
v_fy1
,
_mm_mul_ps
(
v_ai1
,
_mm_set1_ps
(
0.002
f
)));
__m128
v_fxz10
=
_mm_sub_ps
(
v_fy0
,
_mm_mul_ps
(
v_bi0
,
_mm_set1_ps
(
0.005
f
)));
__m128
v_fxz11
=
_mm_sub_ps
(
v_fy1
,
_mm_mul_ps
(
v_bi1
,
_mm_set1_ps
(
0.005
f
)));
__m128
v_fxz000
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz00
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz001
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz01
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz010
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz10
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz011
=
_mm_mul_ps
(
_mm_sub_ps
(
v_fxz11
,
_mm_set1_ps
(
16.0
f
/
116.0
f
)),
_mm_set1_ps
(
1.0
f
/
7.787
f
));
__m128
v_fxz100
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz00
,
v_fxz00
),
v_fxz00
);
__m128
v_fxz101
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz01
,
v_fxz01
),
v_fxz01
);
__m128
v_fxz110
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz10
,
v_fxz10
),
v_fxz10
);
__m128
v_fxz111
=
_mm_mul_ps
(
_mm_mul_ps
(
v_fxz11
,
v_fxz11
),
v_fxz11
);
__m128
v_cmpfxz00
=
_mm_cmple_ps
(
v_fxz00
,
_mm_set1_ps
(
fThresh
));
__m128
v_cmpfxz01
=
_mm_cmple_ps
(
v_fxz01
,
_mm_set1_ps
(
fThresh
));
__m128
v_cmpfxz10
=
_mm_cmple_ps
(
v_fxz10
,
_mm_set1_ps
(
fThresh
));
__m128
v_cmpfxz11
=
_mm_cmple_ps
(
v_fxz11
,
_mm_set1_ps
(
fThresh
));
v_fxz000
=
_mm_and_ps
(
v_cmpfxz00
,
v_fxz000
);
v_fxz001
=
_mm_and_ps
(
v_cmpfxz01
,
v_fxz001
);
v_fxz010
=
_mm_and_ps
(
v_cmpfxz10
,
v_fxz010
);
v_fxz011
=
_mm_and_ps
(
v_cmpfxz11
,
v_fxz011
);
v_fxz100
=
_mm_andnot_ps
(
v_cmpfxz00
,
v_fxz100
);
v_fxz101
=
_mm_andnot_ps
(
v_cmpfxz01
,
v_fxz101
);
v_fxz110
=
_mm_andnot_ps
(
v_cmpfxz10
,
v_fxz110
);
v_fxz111
=
_mm_andnot_ps
(
v_cmpfxz11
,
v_fxz111
);
__m128
v_x0
=
_mm_or_ps
(
v_fxz000
,
v_fxz100
);
__m128
v_x1
=
_mm_or_ps
(
v_fxz001
,
v_fxz101
);
__m128
v_z0
=
_mm_or_ps
(
v_fxz010
,
v_fxz110
);
__m128
v_z1
=
_mm_or_ps
(
v_fxz011
,
v_fxz111
);
__m128
v_ro0
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
0
]),
v_x0
);
__m128
v_ro1
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
0
]),
v_x1
);
__m128
v_go0
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
3
]),
v_x0
);
__m128
v_go1
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
3
]),
v_x1
);
__m128
v_bo0
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
6
]),
v_x0
);
__m128
v_bo1
=
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
6
]),
v_x1
);
v_ro0
=
_mm_add_ps
(
v_ro0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
1
]),
v_y0
));
v_ro1
=
_mm_add_ps
(
v_ro1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
1
]),
v_y1
));
v_go0
=
_mm_add_ps
(
v_go0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
4
]),
v_y0
));
v_go1
=
_mm_add_ps
(
v_go1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
4
]),
v_y1
));
v_bo0
=
_mm_add_ps
(
v_bo0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
7
]),
v_y0
));
v_bo1
=
_mm_add_ps
(
v_bo1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
7
]),
v_y1
));
v_ro0
=
_mm_add_ps
(
v_ro0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
2
]),
v_z0
));
v_ro1
=
_mm_add_ps
(
v_ro1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
2
]),
v_z1
));
v_go0
=
_mm_add_ps
(
v_go0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
5
]),
v_z0
));
v_go1
=
_mm_add_ps
(
v_go1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
5
]),
v_z1
));
v_bo0
=
_mm_add_ps
(
v_bo0
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
8
]),
v_z0
));
v_bo1
=
_mm_add_ps
(
v_bo1
,
_mm_mul_ps
(
_mm_set1_ps
(
coeffs
[
8
]),
v_z1
));
v_li0
=
_mm_min_ps
(
_mm_max_ps
(
v_ro0
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_li1
=
_mm_min_ps
(
_mm_max_ps
(
v_ro1
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_ai0
=
_mm_min_ps
(
_mm_max_ps
(
v_go0
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_ai1
=
_mm_min_ps
(
_mm_max_ps
(
v_go1
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_bi0
=
_mm_min_ps
(
_mm_max_ps
(
v_bo0
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
v_bi1
=
_mm_min_ps
(
_mm_max_ps
(
v_bo1
,
_mm_setzero_ps
()),
_mm_set1_ps
(
1.0
f
));
}
#endif
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
i
,
dcn
=
dstcn
;
int
i
=
0
,
dcn
=
dstcn
;
const
float
*
gammaTab
=
srgb
?
sRGBInvGammaTab
:
0
;
float
gscale
=
GammaTabScale
;
float
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
...
...
@@ -5487,9 +5582,70 @@ struct Lab2RGB_f
float
alpha
=
ColorChannel
<
float
>::
max
();
n
*=
3
;
static
const
float
lThresh
=
0.008856
f
*
903.3
f
;
static
const
float
fThresh
=
7.787
f
*
0.008856
f
+
16.0
f
/
116.0
f
;
for
(
i
=
0
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
#if CV_SSE2
if
(
haveSIMD
)
{
for
(;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128
v_li0
=
_mm_loadu_ps
(
src
+
i
+
0
);
__m128
v_li1
=
_mm_loadu_ps
(
src
+
i
+
4
);
__m128
v_ai0
=
_mm_loadu_ps
(
src
+
i
+
8
);
__m128
v_ai1
=
_mm_loadu_ps
(
src
+
i
+
12
);
__m128
v_bi0
=
_mm_loadu_ps
(
src
+
i
+
16
);
__m128
v_bi1
=
_mm_loadu_ps
(
src
+
i
+
20
);
_mm_deinterleave_ps
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
);
process
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
);
if
(
gammaTab
)
{
__m128
v_gscale
=
_mm_set1_ps
(
gscale
);
v_li0
=
_mm_mul_ps
(
v_li0
,
v_gscale
);
v_li1
=
_mm_mul_ps
(
v_li1
,
v_gscale
);
v_ai0
=
_mm_mul_ps
(
v_ai0
,
v_gscale
);
v_ai1
=
_mm_mul_ps
(
v_ai1
,
v_gscale
);
v_bi0
=
_mm_mul_ps
(
v_bi0
,
v_gscale
);
v_bi1
=
_mm_mul_ps
(
v_bi1
,
v_gscale
);
splineInterpolate
(
v_li0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_li1
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_ai0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_ai1
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_bi0
,
gammaTab
,
GAMMA_TAB_SIZE
);
splineInterpolate
(
v_bi1
,
gammaTab
,
GAMMA_TAB_SIZE
);
}
if
(
dcn
==
4
)
{
__m128
v_a0
=
_mm_set1_ps
(
alpha
);
__m128
v_a1
=
_mm_set1_ps
(
alpha
);
_mm_interleave_ps
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
,
v_a0
,
v_a1
);
_mm_storeu_ps
(
dst
+
0
,
v_li0
);
_mm_storeu_ps
(
dst
+
4
,
v_li1
);
_mm_storeu_ps
(
dst
+
8
,
v_ai0
);
_mm_storeu_ps
(
dst
+
12
,
v_ai1
);
_mm_storeu_ps
(
dst
+
16
,
v_bi0
);
_mm_storeu_ps
(
dst
+
20
,
v_bi1
);
_mm_storeu_ps
(
dst
+
24
,
v_a0
);
_mm_storeu_ps
(
dst
+
28
,
v_a1
);
}
else
{
_mm_interleave_ps
(
v_li0
,
v_li1
,
v_ai0
,
v_ai1
,
v_bi0
,
v_bi1
);
_mm_storeu_ps
(
dst
+
0
,
v_li0
);
_mm_storeu_ps
(
dst
+
4
,
v_li1
);
_mm_storeu_ps
(
dst
+
8
,
v_ai0
);
_mm_storeu_ps
(
dst
+
12
,
v_ai1
);
_mm_storeu_ps
(
dst
+
16
,
v_bi0
);
_mm_storeu_ps
(
dst
+
20
,
v_bi1
);
}
}
}
#endif
for
(;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
float
li
=
src
[
i
];
float
ai
=
src
[
i
+
1
];
...
...
@@ -5540,6 +5696,11 @@ struct Lab2RGB_f
int
dstcn
;
float
coeffs
[
9
];
bool
srgb
;
float
lThresh
;
float
fThresh
;
#if CV_SSE2
bool
haveSIMD
;
#endif
};
#undef clip
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment