Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
09490188
Commit
09490188
authored
Mar 28, 2012
by
Andrey Kamaev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#1365 Fixed numerous bugs in Bayer2RGB_VNG_8u SSE optimization, added simple regression test
parent
32b9bc1d
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
191 additions
and
153 deletions
+191
-153
color.cpp
modules/imgproc/src/color.cpp
+169
-153
test_color.cpp
modules/imgproc/test/test_color.cpp
+22
-0
No files found.
modules/imgproc/src/color.cpp
View file @
09490188
...
...
@@ -2234,7 +2234,7 @@ static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )
bool
greenCell
=
greenCell0
;
i
=
2
;
#if CV_SSE2
#if CV_SSE2
int
limit
=
!
haveSSE
?
N
-
2
:
greenCell
?
std
::
min
(
3
,
N
-
2
)
:
2
;
#else
int
limit
=
N
-
2
;
...
...
@@ -2401,202 +2401,218 @@ static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code )
dstrow
[
blueIdx
^
2
]
=
CV_CAST_8U
(
R
);
greenCell
=
!
greenCell
;
}
#if CV_SSE2
if
(
!
haveSSE
)
break
;
__m128i
emask
=
_mm_set1_epi32
(
0x0000ffff
),
omask
=
_mm_set1_epi32
(
0xffff0000
),
all_ones
=
_mm_set1_epi16
(
1
),
z
=
_mm_setzero_si128
();
__m128
_0_5
=
_mm_set1_ps
(
0.5
f
);
__m128i
emask
=
_mm_set1_epi32
(
0x0000ffff
),
omask
=
_mm_set1_epi32
(
0xffff0000
),
z
=
_mm_setzero_si128
();
__m128
_0_5
=
_mm_set1_ps
(
0.5
f
);
#define _mm_merge_epi16(a, b)
\
_mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)
)
#define _mm_cvt
loepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)
)
#define _mm_
cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16))
#define _mm_merge_epi16(a, b)
_mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA)
#define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f
)
#define _mm_cvt
hiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f
)
#define _mm_
loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts
// process 8 pixels at once
for
(
;
i
<=
N
-
10
;
i
+=
8
,
srow
+=
8
,
brow0
+=
8
,
brow1
+=
8
,
brow2
+=
8
)
{
__m128i
gradN
,
gradS
,
gradW
,
gradE
,
gradNE
,
gradSW
,
gradNW
,
gradSE
;
gradN
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)
brow0
),
_mm_loadu_si128
((
__m128i
*
)
brow1
));
gradS
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)
brow1
),
_mm_loadu_si128
((
__m128i
*
)
brow2
));
gradW
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
-
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
)));
gradE
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
+
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
)));
//int gradN = brow0[0] + brow1[0];
__m128i
gradN
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)
brow0
),
_mm_loadu_si128
((
__m128i
*
)
brow1
));
//int gradS = brow1[0] + brow2[0];
__m128i
gradS
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)
brow1
),
_mm_loadu_si128
((
__m128i
*
)
brow2
));
//int gradW = brow1[N-1] + brow1[N];
__m128i
gradW
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
-
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
)));
//int gradE = brow1[N+1] + brow1[N];
__m128i
gradE
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
+
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N
)));
__m128i
minGrad
,
maxGrad
,
T
;
minGrad
=
_mm_min_epi16
(
_mm_min_epi16
(
_mm_min_epi16
(
gradN
,
gradS
),
gradW
),
gradE
);
maxGrad
=
_mm_max_epi16
(
_mm_max_epi16
(
_mm_max_epi16
(
gradN
,
gradS
),
gradW
),
gradE
);
//int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE);
//int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE);
__m128i
minGrad
=
_mm_min_epi16
(
_mm_min_epi16
(
gradN
,
gradS
),
_mm_min_epi16
(
gradW
,
gradE
));
__m128i
maxGrad
=
_mm_max_epi16
(
_mm_max_epi16
(
gradN
,
gradS
),
_mm_max_epi16
(
gradW
,
gradE
));
__m128i
grad0
,
grad1
;
grad0
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N4
+
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N4
)));
grad1
=
_mm_adds_epu16
(
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N2
+
1
))),
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
+
1
))));
gradNE
=
_mm_srli_epi16
(
_mm_merge_epi16
(
grad0
,
grad1
),
1
);
//int gradNE = brow0[N4+1] + brow1[N4];
//int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1];
grad0
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N4
+
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N4
)));
grad1
=
_mm_adds_epi16
(
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N2
+
1
))),
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
+
1
))));
__m128i
gradNE
=
_mm_merge_epi16
(
grad0
,
grad1
);
grad0
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N4
-
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N4
)));
grad1
=
_mm_adds_epu16
(
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N2
-
1
))),
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
-
1
))));
gradSW
=
_mm_srli_epi16
(
_mm_merge_epi16
(
grad0
,
grad1
),
1
);
//int gradSW = brow1[N4] + brow2[N4-1];
//int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1];
grad0
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N4
-
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N4
)));
grad1
=
_mm_adds_epi16
(
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N2
-
1
))),
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N2
-
1
))));
__m128i
gradSW
=
_mm_merge_epi16
(
grad0
,
grad1
);
minGrad
=
_mm_min_epi16
(
_mm_min_epi16
(
minGrad
,
gradNE
),
gradSW
);
maxGrad
=
_mm_max_epi16
(
_mm_max_epi16
(
maxGrad
,
gradNE
),
gradSW
);
//int gradNW = brow0[N5-1] + brow1[N5];
//int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1];
grad0
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N5
-
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N5
)));
grad1
=
_mm_adds_epi16
(
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N3
-
1
))),
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
-
1
))));
__m128i
gradNW
=
_mm_merge_epi16
(
grad0
,
grad1
);
grad0
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N5
-
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N5
)));
grad1
=
_mm_adds_epu16
(
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N3
-
1
))),
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
-
1
))));
gradNW
=
_mm_srli_epi16
(
_mm_merge_epi16
(
grad0
,
grad1
),
1
);
grad0
=
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N5
+
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N5
)));
grad1
=
_mm_adds_epu16
(
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N3
+
1
))),
_mm_adds_epu16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
+
1
))));
gradSE
=
_mm_srli_epi16
(
_mm_merge_epi16
(
grad0
,
grad1
),
1
);
//int gradSE = brow1[N5] + brow2[N5+1];
//int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1];
grad0
=
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N5
+
1
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N5
)));
grad1
=
_mm_adds_epi16
(
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N3
+
1
))),
_mm_adds_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
)),
_mm_loadu_si128
((
__m128i
*
)(
brow1
+
N3
+
1
))));
__m128i
gradSE
=
_mm_merge_epi16
(
grad0
,
grad1
);
minGrad
=
_mm_min_epi16
(
_mm_min_epi16
(
minGrad
,
gradNW
),
gradSE
);
maxGrad
=
_mm_max_epi16
(
_mm_max_epi16
(
maxGrad
,
gradNW
),
gradSE
);
T
=
_mm_add_epi16
(
_mm_srli_epi16
(
maxGrad
,
1
),
minGrad
);
__m128i
RGs
=
z
,
GRs
=
z
,
Bs
=
z
,
ng
=
z
,
mask
;
__m128i
t0
,
t1
,
x0
,
x1
,
x2
,
x3
,
x4
,
x5
,
x6
,
x7
,
x8
,
x9
,
x10
,
x11
,
x12
,
x13
,
x14
,
x15
,
x16
;
x0
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)
srow
),
z
);
x1
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
-
1
)),
z
);
x2
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
*
2
-
1
)),
z
);
x3
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
)),
z
);
x4
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
*
2
+
1
)),
z
);
x5
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
+
1
)),
z
);
x6
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
+
2
)),
z
);
x7
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
1
)),
z
);
x8
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
+
2
)),
z
);
x9
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
+
1
)),
z
);
x10
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
*
2
+
1
)),
z
);
x11
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
)),
z
);
x12
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
*
2
-
1
)),
z
);
x13
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
-
1
)),
z
);
x14
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
-
2
)),
z
);
x15
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
1
)),
z
);
x16
=
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
-
2
)),
z
);
// gradN
mask
=
_mm_cmpgt_epi16
(
T
,
gradN
);
ng
=
_mm_sub_epi16
(
ng
,
mask
);
t0
=
_mm_slli_epi16
(
x3
,
1
);
t1
=
_mm_adds_epu16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
*
2
)),
z
),
x0
);
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epu16
(
x2
,
x4
)),
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epu16
(
x1
,
x5
),
t0
),
mask
));
// gradNE
mask
=
_mm_cmpgt_epi16
(
T
,
gradNE
);
ng
=
_mm_sub_epi16
(
ng
,
mask
);
t0
=
_mm_slli_epi16
(
x5
,
1
);
t1
=
_mm_adds_epu16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
*
2
+
2
)),
z
),
x0
);
//int T = minGrad + maxGrad/2;
__m128i
T
=
_mm_adds_epi16
(
_mm_srli_epi16
(
maxGrad
,
1
),
minGrad
);
__m128i
RGs
=
z
,
GRs
=
z
,
Bs
=
z
,
ng
=
z
;
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N6
+
1
)),
_mm_adds_epu16
(
x4
,
x7
)),
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epu16
(
x3
,
x6
)),
mask
));
__m128i
x0
=
_mm_loadl_u8_s16
(
srow
,
+
0
);
__m128i
x1
=
_mm_loadl_u8_s16
(
srow
,
-
1
-
bstep
);
__m128i
x2
=
_mm_loadl_u8_s16
(
srow
,
-
1
-
bstep
*
2
);
__m128i
x3
=
_mm_loadl_u8_s16
(
srow
,
-
bstep
);
__m128i
x4
=
_mm_loadl_u8_s16
(
srow
,
+
1
-
bstep
*
2
);
__m128i
x5
=
_mm_loadl_u8_s16
(
srow
,
+
1
-
bstep
);
__m128i
x6
=
_mm_loadl_u8_s16
(
srow
,
+
2
-
bstep
);
__m128i
x7
=
_mm_loadl_u8_s16
(
srow
,
+
1
);
__m128i
x8
=
_mm_loadl_u8_s16
(
srow
,
+
2
+
bstep
);
__m128i
x9
=
_mm_loadl_u8_s16
(
srow
,
+
1
+
bstep
);
__m128i
x10
=
_mm_loadl_u8_s16
(
srow
,
+
1
+
bstep
*
2
);
__m128i
x11
=
_mm_loadl_u8_s16
(
srow
,
+
bstep
);
__m128i
x12
=
_mm_loadl_u8_s16
(
srow
,
-
1
+
bstep
*
2
);
__m128i
x13
=
_mm_loadl_u8_s16
(
srow
,
-
1
+
bstep
);
__m128i
x14
=
_mm_loadl_u8_s16
(
srow
,
-
2
+
bstep
);
__m128i
x15
=
_mm_loadl_u8_s16
(
srow
,
-
1
);
__m128i
x16
=
_mm_loadl_u8_s16
(
srow
,
-
2
-
bstep
);
__m128i
t0
,
t1
,
mask
;
// grad
E
mask
=
_mm_cmpgt_epi16
(
T
,
grad
E
);
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// grad
N ***********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
grad
N
);
// mask = T>gradN
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// ng += (T>gradN)
t0
=
_mm_slli_epi16
(
x
7
,
1
);
t1
=
_mm_adds_ep
u16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
2
)),
z
),
x0
);
t0
=
_mm_slli_epi16
(
x
3
,
1
);
// srow[-bstep]*2
t1
=
_mm_adds_ep
i16
(
_mm_loadl_u8_s16
(
srow
,
-
bstep
*
2
),
x0
);
// srow[-bstep*2] + srow[0]
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
t0
,
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epu16
(
x5
,
x9
),
_mm_adds_epu16
(
x6
,
x8
)),
mask
));
// RGs += (srow[-bstep*2] + srow[0]) * (T>gradN)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
// GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epi16
(
x2
,
x4
)),
mask
));
// Bs += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epi16
(
x1
,
x5
),
t0
),
mask
));
// gradSE
mask
=
_mm_cmpgt_epi16
(
T
,
gradSE
);
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// gradNE **********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
gradNE
);
// mask = T>gradNE
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// ng += (T>gradNE)
t0
=
_mm_slli_epi16
(
x5
,
1
);
// srow[-bstep+1]*2
t1
=
_mm_adds_epi16
(
_mm_loadl_u8_s16
(
srow
,
-
bstep
*
2
+
2
),
x0
);
// srow[-bstep*2+2] + srow[0]
t0
=
_mm_slli_epi16
(
x9
,
1
);
t1
=
_mm_adds_epu16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
*
2
+
2
)),
z
),
x0
);
// RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
// GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N6
+
1
)),
_mm_adds_epi16
(
x4
,
x7
)),
mask
));
// Bs += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])} * (T>gradNE)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epi16
(
x3
,
x6
)),
mask
));
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N6
+
1
)),
_mm_adds_epu16
(
x7
,
x10
)),
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epu16
(
x8
,
x11
)),
mask
));
// gradE ***********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
gradE
);
// mask = T>gradE
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// ng += (T>gradE)
// gradS
mask
=
_mm_cmpgt_epi16
(
T
,
gradS
);
ng
=
_mm_sub_epi16
(
ng
,
mask
);
t0
=
_mm_slli_epi16
(
x7
,
1
);
// srow[1]*2
t1
=
_mm_adds_epi16
(
_mm_loadl_u8_s16
(
srow
,
2
),
x0
);
// srow[2] + srow[0]
// RGs += (srow[2] + srow[0]) * (T>gradE)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
// GRs += (srow[1]*2) * (T>gradE)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
t0
,
mask
));
// Bs += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epi16
(
x5
,
x9
),
_mm_adds_epi16
(
x6
,
x8
)),
mask
));
t0
=
_mm_slli_epi16
(
x11
,
1
);
t1
=
_mm_adds_epu16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
*
2
)),
z
),
x0
);
// gradSE **********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
gradSE
);
// mask = T>gradSE
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// ng += (T>gradSE)
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epu16
(
x10
,
x12
)),
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epu16
(
x9
,
x13
),
t0
),
mask
));
t0
=
_mm_slli_epi16
(
x9
,
1
);
// srow[bstep+1]*2
t1
=
_mm_adds_epi16
(
_mm_loadl_u8_s16
(
srow
,
bstep
*
2
+
2
),
x0
);
// srow[bstep*2+2] + srow[0]
// RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
// GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N6
+
1
)),
_mm_adds_epi16
(
x7
,
x10
)),
mask
));
// Bs += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_slli_epi16
(
x5
,
1
),
_mm_adds_epi16
(
x8
,
x11
)),
mask
));
// gradS
W
mask
=
_mm_cmpgt_epi16
(
T
,
gradS
W
);
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// gradS
***********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
gradS
);
// mask = T>gradS
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// ng += (T>gradS)
t0
=
_mm_slli_epi16
(
x13
,
1
);
t1
=
_mm_adds_epu16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
+
bstep
*
2
-
2
)),
z
),
x0
);
t0
=
_mm_slli_epi16
(
x11
,
1
);
// srow[bstep]*2
t1
=
_mm_adds_epi16
(
_mm_loadl_u8_s16
(
srow
,
bstep
*
2
),
x0
);
// srow[bstep*2]+srow[0]
// RGs += (srow[bstep*2]+srow[0]) * (T>gradS)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
// GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epi16
(
x10
,
x12
)),
mask
));
// Bs += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epi16
(
x9
,
x13
),
t0
),
mask
));
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N6
-
1
)),
_mm_adds_epu16
(
x12
,
x15
)),
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epu16
(
x11
,
x14
)),
mask
));
// gradSW **********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
gradSW
);
// mask = T>gradSW
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// ng += (T>gradSW)
// gradW
mask
=
_mm_cmpgt_epi16
(
T
,
gradW
);
ng
=
_mm_sub_epi16
(
ng
,
mask
);
t0
=
_mm_slli_epi16
(
x13
,
1
);
// srow[bstep-1]*2
t1
=
_mm_adds_epi16
(
_mm_loadl_u8_s16
(
srow
,
bstep
*
2
-
2
),
x0
);
// srow[bstep*2-2]+srow[0]
// RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
// GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow2
+
N6
-
1
)),
_mm_adds_epi16
(
x12
,
x15
)),
mask
));
// Bs += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epi16
(
x11
,
x14
)),
mask
));
t0
=
_mm_slli_epi16
(
x15
,
1
);
t1
=
_mm_adds_epu16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
2
)),
z
),
x0
);
// gradW ***********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
gradW
);
// mask = T>gradW
ng
=
_mm_sub_epi16
(
ng
,
mask
);
// ng += (T>gradW)
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
t0
,
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epu16
(
x1
,
x13
),
_mm_adds_epu16
(
x14
,
x16
)),
mask
));
t0
=
_mm_slli_epi16
(
x15
,
1
);
// srow[-1]*2
t1
=
_mm_adds_epi16
(
_mm_loadl_u8_s16
(
srow
,
-
2
),
x0
);
// srow[-2]+srow[0]
// RGs += (srow[-2]+srow[0]) * (T>gradW)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
t1
,
mask
));
// GRs += (srow[-1]*2) * (T>gradW)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
t0
,
mask
));
// Bs += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_adds_epi16
(
x1
,
x13
),
_mm_adds_epi16
(
x14
,
x16
)),
mask
));
// gradNW
mask
=
_mm_cmpgt_epi16
(
T
,
gradNW
);
ng
=
_mm_
max_epi16
(
_mm_sub_epi16
(
ng
,
mask
),
all_ones
);
// gradNW
**********************************************
mask
=
_mm_cmpgt_epi16
(
T
,
gradNW
);
// mask = T>gradNW
ng
=
_mm_
sub_epi16
(
ng
,
mask
);
// ng += (T>gradNW)
t0
=
_mm_slli_epi16
(
x1
,
1
);
// srow[-bstep-1]*2
t1
=
_mm_adds_epi16
(
_mm_loadl_u8_s16
(
srow
,
-
bstep
*
2
-
2
),
x0
);
// srow[-bstep*2-2]+srow[0]
// RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW)
RGs
=
_mm_adds_epi16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
// GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW)
GRs
=
_mm_adds_epi16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N6
-
1
)),
_mm_adds_epi16
(
x2
,
x15
)),
mask
));
// Bs += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW)
Bs
=
_mm_adds_epi16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_slli_epi16
(
x5
,
1
),
_mm_adds_epi16
(
x3
,
x16
)),
mask
));
__m128
ngf0
,
ngf1
;
ngf0
=
_mm_div_ps
(
_0_5
,
_mm_cvtloepi16_ps
(
ng
));
ngf1
=
_mm_div_ps
(
_0_5
,
_mm_cvthiepi16_ps
(
ng
));
t0
=
_mm_slli_epi16
(
x1
,
1
);
t1
=
_mm_adds_epu16
(
_mm_unpacklo_epi8
(
_mm_loadl_epi64
((
__m128i
*
)(
srow
-
bstep
*
2
-
2
)),
z
),
x0
);
RGs
=
_mm_adds_epu16
(
RGs
,
_mm_and_si128
(
_mm_merge_epi16
(
t1
,
t0
),
mask
));
GRs
=
_mm_adds_epu16
(
GRs
,
_mm_and_si128
(
_mm_merge_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
brow0
+
N6
-
1
)),
_mm_adds_epu16
(
x2
,
x15
)),
mask
));
Bs
=
_mm_adds_epu16
(
Bs
,
_mm_and_si128
(
_mm_merge_epi16
(
t0
,
_mm_adds_epu16
(
x3
,
x16
)),
mask
));
// now interpolate r, g & b
t0
=
_mm_sub_epi16
(
GRs
,
RGs
);
t1
=
_mm_sub_epi16
(
Bs
,
RGs
);
...
...
modules/imgproc/test/test_color.cpp
View file @
09490188
...
...
@@ -1658,6 +1658,7 @@ void CV_ColorBayerTest::prepare_to_validation( int /*test_case_idx*/ )
CV_Error
(
CV_StsUnsupportedFormat
,
""
);
}
/////////////////////////////////////////////////////////////////////////////////////////////////
TEST
(
Imgproc_ColorGray
,
accuracy
)
{
CV_ColorGrayTest
test
;
test
.
safe_run
();
}
...
...
@@ -1669,3 +1670,24 @@ TEST(Imgproc_ColorLab, accuracy) { CV_ColorLabTest test; test.safe_run(); }
TEST
(
Imgproc_ColorLuv
,
accuracy
)
{
CV_ColorLuvTest
test
;
test
.
safe_run
();
}
TEST
(
Imgproc_ColorRGB
,
accuracy
)
{
CV_ColorRGBTest
test
;
test
.
safe_run
();
}
TEST
(
Imgproc_ColorBayer
,
accuracy
)
{
CV_ColorBayerTest
test
;
test
.
safe_run
();
}
TEST
(
Imgproc_ColorBayerVNG
,
accuracy
)
{
cvtest
::
TS
&
ts
=
*
cvtest
::
TS
::
ptr
();
Mat
given
=
imread
(
ts
.
get_data_path
()
+
"/cvtcolor/bayerVNG_input.png"
,
CV_LOAD_IMAGE_GRAYSCALE
);
Mat
gold
=
imread
(
ts
.
get_data_path
()
+
"/cvtcolor/bayerVNG_gold.png"
,
CV_LOAD_IMAGE_UNCHANGED
);
Mat
result
;
cvtColor
(
given
,
result
,
CV_BayerBG2BGR_VNG
,
3
);
EXPECT_EQ
(
gold
.
type
(),
result
.
type
());
EXPECT_EQ
(
gold
.
cols
,
result
.
cols
);
EXPECT_EQ
(
gold
.
rows
,
result
.
rows
);
Mat
diff
;
absdiff
(
gold
,
result
,
diff
);
EXPECT_EQ
(
0
,
countNonZero
(
diff
.
reshape
(
1
)
>
1
));
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment