Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c46f119e
Commit
c46f119e
authored
Oct 23, 2019
by
ChipKerchner
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Convert demosaic functions to HAL
parent
d513fb4c
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
318 additions
and
273 deletions
+318
-273
demosaicing.cpp
modules/imgproc/src/demosaicing.cpp
+318
-273
No files found.
modules/imgproc/src/demosaicing.cpp
View file @
c46f119e
...
...
@@ -86,6 +86,7 @@
#include "precomp.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include <limits>
...
...
@@ -111,7 +112,7 @@ public:
return
0
;
}
int
bayer2RGBA
(
const
T
*
,
int
,
T
*
,
int
,
int
)
const
int
bayer2RGBA
(
const
T
*
,
int
,
T
*
,
int
,
int
,
const
T
)
const
{
return
0
;
}
...
...
@@ -122,279 +123,14 @@ public:
}
};
#if CV_S
SE2
#if CV_S
IMD128
class
SIMDBayerInterpolator_8u
{
public
:
SIMDBayerInterpolator_8u
()
{
use_simd
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
int
bayer2Gray
(
const
uchar
*
bayer
,
int
bayer_step
,
uchar
*
dst
,
int
width
,
int
bcoeff
,
int
gcoeff
,
int
rcoeff
)
const
{
if
(
!
use_simd
)
return
0
;
__m128i
_b2y
=
_mm_set1_epi16
((
short
)(
rcoeff
*
2
));
__m128i
_g2y
=
_mm_set1_epi16
((
short
)(
gcoeff
*
2
));
__m128i
_r2y
=
_mm_set1_epi16
((
short
)(
bcoeff
*
2
));
const
uchar
*
bayer_end
=
bayer
+
width
;
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
14
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
bayer
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)(
bayer
+
bayer_step
));
__m128i
r2
=
_mm_loadu_si128
((
const
__m128i
*
)(
bayer
+
bayer_step
*
2
));
__m128i
b1
=
_mm_add_epi16
(
_mm_srli_epi16
(
_mm_slli_epi16
(
r0
,
8
),
7
),
_mm_srli_epi16
(
_mm_slli_epi16
(
r2
,
8
),
7
));
__m128i
b0
=
_mm_add_epi16
(
b1
,
_mm_srli_si128
(
b1
,
2
));
b1
=
_mm_slli_epi16
(
_mm_srli_si128
(
b1
,
2
),
1
);
__m128i
g0
=
_mm_add_epi16
(
_mm_srli_epi16
(
r0
,
7
),
_mm_srli_epi16
(
r2
,
7
));
__m128i
g1
=
_mm_srli_epi16
(
_mm_slli_epi16
(
r1
,
8
),
7
);
g0
=
_mm_add_epi16
(
g0
,
_mm_add_epi16
(
g1
,
_mm_srli_si128
(
g1
,
2
)));
g1
=
_mm_slli_epi16
(
_mm_srli_si128
(
g1
,
2
),
2
);
r0
=
_mm_srli_epi16
(
r1
,
8
);
r1
=
_mm_slli_epi16
(
_mm_add_epi16
(
r0
,
_mm_srli_si128
(
r0
,
2
)),
2
);
r0
=
_mm_slli_epi16
(
r0
,
3
);
g0
=
_mm_add_epi16
(
_mm_mulhi_epi16
(
b0
,
_b2y
),
_mm_mulhi_epi16
(
g0
,
_g2y
));
g1
=
_mm_add_epi16
(
_mm_mulhi_epi16
(
b1
,
_b2y
),
_mm_mulhi_epi16
(
g1
,
_g2y
));
g0
=
_mm_add_epi16
(
g0
,
_mm_mulhi_epi16
(
r0
,
_r2y
));
g1
=
_mm_add_epi16
(
g1
,
_mm_mulhi_epi16
(
r1
,
_r2y
));
g0
=
_mm_srli_epi16
(
g0
,
2
);
g1
=
_mm_srli_epi16
(
g1
,
2
);
g0
=
_mm_packus_epi16
(
g0
,
g0
);
g1
=
_mm_packus_epi16
(
g1
,
g1
);
g0
=
_mm_unpacklo_epi8
(
g0
,
g1
);
_mm_storeu_si128
((
__m128i
*
)
dst
,
g0
);
}
return
(
int
)(
bayer
-
(
bayer_end
-
width
));
}
int
bayer2RGB
(
const
uchar
*
bayer
,
int
bayer_step
,
uchar
*
dst
,
int
width
,
int
blue
)
const
{
if
(
!
use_simd
)
return
0
;
/*
B G B G | B G B G | B G B G | B G B G
G R G R | G R G R | G R G R | G R G R
B G B G | B G B G | B G B G | B G B G
*/
__m128i
delta1
=
_mm_set1_epi16
(
1
),
delta2
=
_mm_set1_epi16
(
2
);
__m128i
mask
=
_mm_set1_epi16
(
blue
<
0
?
-
1
:
0
),
z
=
_mm_setzero_si128
();
__m128i
masklo
=
_mm_set1_epi16
(
0x00ff
);
const
uchar
*
bayer_end
=
bayer
+
width
;
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
42
)
{
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
bayer
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)(
bayer
+
bayer_step
));
__m128i
r2
=
_mm_loadu_si128
((
const
__m128i
*
)(
bayer
+
bayer_step
*
2
));
__m128i
b1
=
_mm_add_epi16
(
_mm_and_si128
(
r0
,
masklo
),
_mm_and_si128
(
r2
,
masklo
));
__m128i
nextb1
=
_mm_srli_si128
(
b1
,
2
);
__m128i
b0
=
_mm_add_epi16
(
b1
,
nextb1
);
b1
=
_mm_srli_epi16
(
_mm_add_epi16
(
nextb1
,
delta1
),
1
);
b0
=
_mm_srli_epi16
(
_mm_add_epi16
(
b0
,
delta2
),
2
);
// b0 b2 ... b14 b1 b3 ... b15
b0
=
_mm_packus_epi16
(
b0
,
b1
);
__m128i
g0
=
_mm_add_epi16
(
_mm_srli_epi16
(
r0
,
8
),
_mm_srli_epi16
(
r2
,
8
));
__m128i
g1
=
_mm_and_si128
(
r1
,
masklo
);
g0
=
_mm_add_epi16
(
g0
,
_mm_add_epi16
(
g1
,
_mm_srli_si128
(
g1
,
2
)));
g1
=
_mm_srli_si128
(
g1
,
2
);
g0
=
_mm_srli_epi16
(
_mm_add_epi16
(
g0
,
delta2
),
2
);
// g0 g2 ... g14 g1 g3 ... g15
g0
=
_mm_packus_epi16
(
g0
,
g1
);
r0
=
_mm_srli_epi16
(
r1
,
8
);
r1
=
_mm_add_epi16
(
r0
,
_mm_srli_si128
(
r0
,
2
));
r1
=
_mm_srli_epi16
(
_mm_add_epi16
(
r1
,
delta1
),
1
);
// r0 r2 ... r14 r1 r3 ... r15
r0
=
_mm_packus_epi16
(
r0
,
r1
);
b1
=
_mm_and_si128
(
_mm_xor_si128
(
b0
,
r0
),
mask
);
b0
=
_mm_xor_si128
(
b0
,
b1
);
r0
=
_mm_xor_si128
(
r0
,
b1
);
// b1 g1 b3 g3 b5 g5...
b1
=
_mm_unpackhi_epi8
(
b0
,
g0
);
// b0 g0 b2 g2 b4 g4 ....
b0
=
_mm_unpacklo_epi8
(
b0
,
g0
);
// r1 0 r3 0 r5 0 ...
r1
=
_mm_unpackhi_epi8
(
r0
,
z
);
// r0 0 r2 0 r4 0 ...
r0
=
_mm_unpacklo_epi8
(
r0
,
z
);
// 0 b0 g0 r0 0 b2 g2 r2 ...
g0
=
_mm_slli_si128
(
_mm_unpacklo_epi16
(
b0
,
r0
),
1
);
// 0 b8 g8 r8 0 b10 g10 r10 ...
g1
=
_mm_slli_si128
(
_mm_unpackhi_epi16
(
b0
,
r0
),
1
);
// b1 g1 r1 0 b3 g3 r3 0 ...
r0
=
_mm_unpacklo_epi16
(
b1
,
r1
);
// b9 g9 r9 0 b11 g11 r11 0 ...
r1
=
_mm_unpackhi_epi16
(
b1
,
r1
);
// 0 b0 g0 r0 b1 g1 r1 0 ...
b0
=
_mm_srli_si128
(
_mm_unpacklo_epi32
(
g0
,
r0
),
1
);
// 0 b4 g4 r4 b5 g5 r5 0 ...
b1
=
_mm_srli_si128
(
_mm_unpackhi_epi32
(
g0
,
r0
),
1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
-
1
+
0
),
b0
);
_mm_storel_epi64
((
__m128i
*
)(
dst
-
1
+
6
*
1
),
_mm_srli_si128
(
b0
,
8
));
_mm_storel_epi64
((
__m128i
*
)(
dst
-
1
+
6
*
2
),
b1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
-
1
+
6
*
3
),
_mm_srli_si128
(
b1
,
8
));
// 0 b8 g8 r8 b9 g9 r9 0 ...
g0
=
_mm_srli_si128
(
_mm_unpacklo_epi32
(
g1
,
r1
),
1
);
// 0 b12 g12 r12 b13 g13 r13 0 ...
g1
=
_mm_srli_si128
(
_mm_unpackhi_epi32
(
g1
,
r1
),
1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
-
1
+
6
*
4
),
g0
);
_mm_storel_epi64
((
__m128i
*
)(
dst
-
1
+
6
*
5
),
_mm_srli_si128
(
g0
,
8
));
_mm_storel_epi64
((
__m128i
*
)(
dst
-
1
+
6
*
6
),
g1
);
}
return
(
int
)(
bayer
-
(
bayer_end
-
width
));
}
int
bayer2RGBA
(
const
uchar
*
,
int
,
uchar
*
,
int
,
int
)
const
{
return
0
;
}
int
bayer2RGB_EA
(
const
uchar
*
bayer
,
int
bayer_step
,
uchar
*
dst
,
int
width
,
int
blue
)
const
{
if
(
!
use_simd
)
return
0
;
const
uchar
*
bayer_end
=
bayer
+
width
;
__m128i
masklow
=
_mm_set1_epi16
(
0x00ff
);
__m128i
delta1
=
_mm_set1_epi16
(
1
),
delta2
=
_mm_set1_epi16
(
2
);
__m128i
full
=
_mm_set1_epi16
(
-
1
),
z
=
_mm_setzero_si128
();
__m128i
mask
=
_mm_set1_epi16
(
blue
>
0
?
-
1
:
0
);
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
42
)
{
/*
B G B G | B G B G | B G B G | B G B G
G R G R | G R G R | G R G R | G R G R
B G B G | B G B G | B G B G | B G B G
*/
__m128i
r0
=
_mm_loadu_si128
((
const
__m128i
*
)
bayer
);
__m128i
r1
=
_mm_loadu_si128
((
const
__m128i
*
)(
bayer
+
bayer_step
));
__m128i
r2
=
_mm_loadu_si128
((
const
__m128i
*
)(
bayer
+
bayer_step
*
2
));
__m128i
b1
=
_mm_add_epi16
(
_mm_and_si128
(
r0
,
masklow
),
_mm_and_si128
(
r2
,
masklow
));
__m128i
nextb1
=
_mm_srli_si128
(
b1
,
2
);
__m128i
b0
=
_mm_add_epi16
(
b1
,
nextb1
);
b1
=
_mm_srli_epi16
(
_mm_add_epi16
(
nextb1
,
delta1
),
1
);
b0
=
_mm_srli_epi16
(
_mm_add_epi16
(
b0
,
delta2
),
2
);
// b0 b2 ... b14 b1 b3 ... b15
b0
=
_mm_packus_epi16
(
b0
,
b1
);
// vertical sum
__m128i
r0g
=
_mm_srli_epi16
(
r0
,
8
);
__m128i
r2g
=
_mm_srli_epi16
(
r2
,
8
);
__m128i
sumv
=
_mm_srli_epi16
(
_mm_add_epi16
(
_mm_add_epi16
(
r0g
,
r2g
),
delta1
),
1
);
// gorizontal sum
__m128i
g1
=
_mm_and_si128
(
masklow
,
r1
);
__m128i
nextg1
=
_mm_srli_si128
(
g1
,
2
);
__m128i
sumg
=
_mm_srli_epi16
(
_mm_add_epi16
(
_mm_add_epi16
(
g1
,
nextg1
),
delta1
),
1
);
// gradients
__m128i
gradv
=
_mm_adds_epi16
(
_mm_subs_epu16
(
r0g
,
r2g
),
_mm_subs_epu16
(
r2g
,
r0g
));
__m128i
gradg
=
_mm_adds_epi16
(
_mm_subs_epu16
(
nextg1
,
g1
),
_mm_subs_epu16
(
g1
,
nextg1
));
__m128i
gmask
=
_mm_cmpgt_epi16
(
gradg
,
gradv
);
__m128i
g0
=
_mm_add_epi16
(
_mm_and_si128
(
gmask
,
sumv
),
_mm_and_si128
(
sumg
,
_mm_xor_si128
(
gmask
,
full
)));
// g0 g2 ... g14 g1 g3 ...
g0
=
_mm_packus_epi16
(
g0
,
nextg1
);
r0
=
_mm_srli_epi16
(
r1
,
8
);
r1
=
_mm_add_epi16
(
r0
,
_mm_srli_si128
(
r0
,
2
));
r1
=
_mm_srli_epi16
(
_mm_add_epi16
(
r1
,
delta1
),
1
);
// r0 r2 ... r14 r1 r3 ... r15
r0
=
_mm_packus_epi16
(
r0
,
r1
);
b1
=
_mm_and_si128
(
_mm_xor_si128
(
b0
,
r0
),
mask
);
b0
=
_mm_xor_si128
(
b0
,
b1
);
r0
=
_mm_xor_si128
(
r0
,
b1
);
// b1 g1 b3 g3 b5 g5...
b1
=
_mm_unpackhi_epi8
(
b0
,
g0
);
// b0 g0 b2 g2 b4 g4 ....
b0
=
_mm_unpacklo_epi8
(
b0
,
g0
);
// r1 0 r3 0 r5 0 ...
r1
=
_mm_unpackhi_epi8
(
r0
,
z
);
// r0 0 r2 0 r4 0 ...
r0
=
_mm_unpacklo_epi8
(
r0
,
z
);
// 0 b0 g0 r0 0 b2 g2 r2 ...
g0
=
_mm_slli_si128
(
_mm_unpacklo_epi16
(
b0
,
r0
),
1
);
// 0 b8 g8 r8 0 b10 g10 r10 ...
g1
=
_mm_slli_si128
(
_mm_unpackhi_epi16
(
b0
,
r0
),
1
);
// b1 g1 r1 0 b3 g3 r3 0 ...
r0
=
_mm_unpacklo_epi16
(
b1
,
r1
);
// b9 g9 r9 0 b11 g11 r11 0 ...
r1
=
_mm_unpackhi_epi16
(
b1
,
r1
);
// 0 b0 g0 r0 b1 g1 r1 0 ...
b0
=
_mm_srli_si128
(
_mm_unpacklo_epi32
(
g0
,
r0
),
1
);
// 0 b4 g4 r4 b5 g5 r5 0 ...
b1
=
_mm_srli_si128
(
_mm_unpackhi_epi32
(
g0
,
r0
),
1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
0
),
b0
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
6
*
1
),
_mm_srli_si128
(
b0
,
8
));
_mm_storel_epi64
((
__m128i
*
)(
dst
+
6
*
2
),
b1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
6
*
3
),
_mm_srli_si128
(
b1
,
8
));
// 0 b8 g8 r8 b9 g9 r9 0 ...
g0
=
_mm_srli_si128
(
_mm_unpacklo_epi32
(
g1
,
r1
),
1
);
// 0 b12 g12 r12 b13 g13 r13 0 ...
g1
=
_mm_srli_si128
(
_mm_unpackhi_epi32
(
g1
,
r1
),
1
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
6
*
4
),
g0
);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
6
*
5
),
_mm_srli_si128
(
g0
,
8
));
_mm_storel_epi64
((
__m128i
*
)(
dst
+
6
*
6
),
g1
);
}
return
int
(
bayer
-
(
bayer_end
-
width
));
}
bool
use_simd
;
};
#elif CV_NEON
class
SIMDBayerInterpolator_8u
{
public
:
SIMDBayerInterpolator_8u
()
{
}
int
bayer2Gray
(
const
uchar
*
bayer
,
int
bayer_step
,
uchar
*
dst
,
int
width
,
int
bcoeff
,
int
gcoeff
,
int
rcoeff
)
const
{
/*
B G B G | B G B G | B G B G | B G B G
G R G R | G R G R | G R G R | G R G R
B G B G | B G B G | B G B G | B G B G
*/
#if CV_NEON
uint16x8_t
masklo
=
vdupq_n_u16
(
255
);
const
uchar
*
bayer_end
=
bayer
+
width
;
...
...
@@ -440,6 +176,40 @@ public:
vst1_u8
(
dst
,
p
.
val
[
0
]);
vst1_u8
(
dst
+
8
,
p
.
val
[
1
]);
}
#else
v_uint16x8
_b2y
=
v_setall_u16
((
ushort
)(
rcoeff
*
2
));
v_uint16x8
_g2y
=
v_setall_u16
((
ushort
)(
gcoeff
*
2
));
v_uint16x8
_r2y
=
v_setall_u16
((
ushort
)(
bcoeff
*
2
));
const
uchar
*
bayer_end
=
bayer
+
width
;
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
14
)
{
v_uint16x8
r0
=
v_load
((
ushort
*
)
bayer
);
v_uint16x8
r1
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
));
v_uint16x8
r2
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
*
2
));
v_uint16x8
b1
=
((
r0
<<
8
)
>>
7
)
+
((
r2
<<
8
)
>>
7
);
v_uint16x8
b0
=
v_rotate_right
<
1
>
(
b1
)
+
b1
;
b1
=
v_rotate_right
<
1
>
(
b1
)
<<
1
;
v_uint16x8
g0
=
(
r0
>>
7
)
+
(
r2
>>
7
);
v_uint16x8
g1
=
(
r1
<<
8
)
>>
7
;
g0
+=
v_rotate_right
<
1
>
(
g1
)
+
g1
;
g1
=
v_rotate_right
<
1
>
(
g1
)
<<
2
;
r0
=
r1
>>
8
;
r1
=
(
v_rotate_right
<
1
>
(
r0
)
+
r0
)
<<
2
;
r0
=
r0
<<
3
;
g0
=
(
v_mul_hi
(
b0
,
_b2y
)
+
v_mul_hi
(
g0
,
_g2y
)
+
v_mul_hi
(
r0
,
_r2y
))
>>
2
;
g1
=
(
v_mul_hi
(
b1
,
_b2y
)
+
v_mul_hi
(
g1
,
_g2y
)
+
v_mul_hi
(
r1
,
_r2y
))
>>
2
;
v_uint8x16
pack_lo
,
pack_hi
;
v_zip
(
v_pack_u
(
v_reinterpret_as_s16
(
g0
),
v_reinterpret_as_s16
(
g0
)),
v_pack_u
(
v_reinterpret_as_s16
(
g1
),
v_reinterpret_as_s16
(
g1
)),
pack_lo
,
pack_hi
);
v_store
(
dst
,
pack_lo
);
}
#endif
return
(
int
)(
bayer
-
(
bayer_end
-
width
));
}
...
...
@@ -451,6 +221,8 @@ public:
G R G R | G R G R | G R G R | G R G R
B G B G | B G B G | B G B G | B G B G
*/
#if CV_NEON
uint16x8_t
masklo
=
vdupq_n_u16
(
255
);
uint8x16x3_t
pix
;
const
uchar
*
bayer_end
=
bayer
+
width
;
...
...
@@ -484,21 +256,109 @@ public:
vst3q_u8
(
dst
-
1
,
pix
);
}
#else
v_uint16x8
delta1
=
v_setall_u16
(
1
),
delta2
=
v_setall_u16
(
2
);
v_uint16x8
mask
=
v_setall_u16
(
blue
<
0
?
(
ushort
)(
-
1
)
:
0
);
v_uint16x8
masklo
=
v_setall_u16
(
0x00ff
);
v_uint8x16
z
=
v_setzero_u8
();
const
uchar
*
bayer_end
=
bayer
+
width
;
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
42
)
{
v_uint16x8
r0
=
v_load
((
ushort
*
)
bayer
);
v_uint16x8
r1
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
));
v_uint16x8
r2
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
*
2
));
v_uint16x8
b1
=
(
r0
&
masklo
)
+
(
r2
&
masklo
);
v_uint16x8
nextb1
=
v_rotate_right
<
1
>
(
b1
);
v_uint16x8
b0
=
b1
+
nextb1
;
b1
=
(
nextb1
+
delta1
)
>>
1
;
b0
=
(
b0
+
delta2
)
>>
2
;
// b0 b2 ... b14 b1 b3 ... b15
b0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
b0
),
v_reinterpret_as_s16
(
b1
)));
v_uint16x8
g0
=
(
r0
>>
8
)
+
(
r2
>>
8
);
v_uint16x8
g1
=
r1
&
masklo
;
g0
+=
v_rotate_right
<
1
>
(
g1
)
+
g1
;
g1
=
v_rotate_right
<
1
>
(
g1
);
g0
=
(
g0
+
delta2
)
>>
2
;
// g0 g2 ... g14 g1 g3 ... g15
g0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
g0
),
v_reinterpret_as_s16
(
g1
)));
r0
=
r1
>>
8
;
r1
=
v_rotate_right
<
1
>
(
r0
)
+
r0
;
r1
=
(
r1
+
delta1
)
>>
1
;
// r0 r2 ... r14 r1 r3 ... r15
r0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
r0
),
v_reinterpret_as_s16
(
r1
)));
b1
=
(
b0
^
r0
)
&
mask
;
b0
=
b0
^
b1
;
r0
=
r0
^
b1
;
// b1 g1 b3 g3 b5 g5...
v_uint8x16
pack_lo
,
pack_hi
;
v_zip
(
v_reinterpret_as_u8
(
b0
),
v_reinterpret_as_u8
(
g0
),
pack_lo
,
pack_hi
);
b1
=
v_reinterpret_as_u16
(
pack_hi
);
// b0 g0 b2 g2 b4 g4 ....
b0
=
v_reinterpret_as_u16
(
pack_lo
);
// r1 0 r3 0 r5 0 ...
v_zip
(
v_reinterpret_as_u8
(
r0
),
z
,
pack_lo
,
pack_hi
);
r1
=
v_reinterpret_as_u16
(
pack_hi
);
// r0 0 r2 0 r4 0 ...
r0
=
v_reinterpret_as_u16
(
pack_lo
);
// 0 b0 g0 r0 0 b2 g2 r2 ...
v_zip
(
b0
,
r0
,
g0
,
g1
);
g0
=
v_reinterpret_as_u16
(
v_rotate_left
<
1
>
(
v_reinterpret_as_u8
(
g0
)));
// 0 b8 g8 r8 0 b10 g10 r10 ...
g1
=
v_reinterpret_as_u16
(
v_rotate_left
<
1
>
(
v_reinterpret_as_u8
(
g1
)));
// b1 g1 r1 0 b3 g3 r3 0 ...
v_zip
(
b1
,
r1
,
r0
,
r1
);
// b9 g9 r9 0 b11 g11 r11 0 ...
// 0 b0 g0 r0 b1 g1 r1 0 ...
v_uint32x4
pack32_lo
,
pack32_hi
;
v_zip
(
v_reinterpret_as_u32
(
g0
),
v_reinterpret_as_u32
(
r0
),
pack32_lo
,
pack32_hi
);
b0
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_lo
)));
// 0 b4 g4 r4 b5 g5 r5 0 ...
b1
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_hi
)));
v_store_low
(
dst
-
1
+
0
,
v_reinterpret_as_u8
(
b0
));
v_store_high
(
dst
-
1
+
6
*
1
,
v_reinterpret_as_u8
(
b0
));
v_store_low
(
dst
-
1
+
6
*
2
,
v_reinterpret_as_u8
(
b1
));
v_store_high
(
dst
-
1
+
6
*
3
,
v_reinterpret_as_u8
(
b1
));
// 0 b8 g8 r8 b9 g9 r9 0 ...
v_zip
(
v_reinterpret_as_u32
(
g1
),
v_reinterpret_as_u32
(
r1
),
pack32_lo
,
pack32_hi
);
g0
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_lo
)));
// 0 b12 g12 r12 b13 g13 r13 0 ...
g1
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_hi
)));
v_store_low
(
dst
-
1
+
6
*
4
,
v_reinterpret_as_u8
(
g0
));
v_store_high
(
dst
-
1
+
6
*
5
,
v_reinterpret_as_u8
(
g0
));
v_store_low
(
dst
-
1
+
6
*
6
,
v_reinterpret_as_u8
(
g1
));
}
#endif
return
(
int
)(
bayer
-
(
bayer_end
-
width
));
}
int
bayer2RGBA
(
const
uchar
*
bayer
,
int
bayer_step
,
uchar
*
dst
,
int
width
,
int
blue
)
const
int
bayer2RGBA
(
const
uchar
*
bayer
,
int
bayer_step
,
uchar
*
dst
,
int
width
,
int
blue
,
const
uchar
alpha
)
const
{
/*
B G B G | B G B G | B G B G | B G B G
G R G R | G R G R | G R G R | G R G R
B G B G | B G B G | B G B G | B G B G
*/
#if CV_NEON
uint16x8_t
masklo
=
vdupq_n_u16
(
255
);
uint8x16x4_t
pix
;
const
uchar
*
bayer_end
=
bayer
+
width
;
pix
.
val
[
3
]
=
vdupq_n_u8
(
255
);
pix
.
val
[
3
]
=
vdupq_n_u8
(
alpha
);
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
56
)
{
...
...
@@ -529,13 +389,198 @@ public:
vst4q_u8
(
dst
-
1
,
pix
);
}
#else
v_uint16x8
delta1
=
v_setall_u16
(
1
),
delta2
=
v_setall_u16
(
2
);
v_uint16x8
mask
=
v_setall_u16
(
blue
<
0
?
(
ushort
)(
-
1
)
:
0
);
v_uint16x8
masklo
=
v_setall_u16
(
0x00ff
);
v_uint8x16
a
=
v_setall_u8
(
alpha
);
const
uchar
*
bayer_end
=
bayer
+
width
;
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
56
)
{
v_uint16x8
r0
=
v_load
((
ushort
*
)
bayer
);
v_uint16x8
r1
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
));
v_uint16x8
r2
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
*
2
));
v_uint16x8
b1
=
(
r0
&
masklo
)
+
(
r2
&
masklo
);
v_uint16x8
nextb1
=
v_rotate_right
<
1
>
(
b1
);
v_uint16x8
b0
=
b1
+
nextb1
;
b1
=
(
nextb1
+
delta1
)
>>
1
;
b0
=
(
b0
+
delta2
)
>>
2
;
// b0 b2 ... b14 b1 b3 ... b15
b0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
b0
),
v_reinterpret_as_s16
(
b1
)));
v_uint16x8
g0
=
(
r0
>>
8
)
+
(
r2
>>
8
);
v_uint16x8
g1
=
r1
&
masklo
;
g0
+=
v_rotate_right
<
1
>
(
g1
)
+
g1
;
g1
=
v_rotate_right
<
1
>
(
g1
);
g0
=
(
g0
+
delta2
)
>>
2
;
// g0 g2 ... g14 g1 g3 ... g15
g0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
g0
),
v_reinterpret_as_s16
(
g1
)));
r0
=
r1
>>
8
;
r1
=
v_rotate_right
<
1
>
(
r0
)
+
r0
;
r1
=
(
r1
+
delta1
)
>>
1
;
// r0 r2 ... r14 r1 r3 ... r15
r0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
r0
),
v_reinterpret_as_s16
(
r1
)));
b1
=
(
b0
^
r0
)
&
mask
;
b0
=
b0
^
b1
;
r0
=
r0
^
b1
;
// b1 g1 b3 g3 b5 g5...
v_uint8x16
pack_lo
,
pack_hi
;
v_zip
(
v_reinterpret_as_u8
(
b0
),
v_reinterpret_as_u8
(
g0
),
pack_lo
,
pack_hi
);
b1
=
v_reinterpret_as_u16
(
pack_hi
);
// b0 g0 b2 g2 b4 g4 ....
b0
=
v_reinterpret_as_u16
(
pack_lo
);
// r1 a r3 a r5 a ...
v_zip
(
v_reinterpret_as_u8
(
r0
),
a
,
pack_lo
,
pack_hi
);
r1
=
v_reinterpret_as_u16
(
pack_hi
);
// r0 a r2 a r4 a ...
r0
=
v_reinterpret_as_u16
(
pack_lo
);
// a b0 g0 r0 a b2 g2 r2 ...
v_zip
(
b0
,
r0
,
g0
,
g1
);
// a b8 g8 r8 a b10 g10 r10 ...
// b1 g1 r1 a b3 g3 r3 a ...
v_zip
(
b1
,
r1
,
r0
,
r1
);
// b9 g9 r9 a b11 g11 r11 a ...
// a b0 g0 r0 b1 g1 r1 a ...
v_uint32x4
pack32_lo
,
pack32_hi
;
v_zip
(
v_reinterpret_as_u32
(
g0
),
v_reinterpret_as_u32
(
r0
),
pack32_lo
,
pack32_hi
);
b0
=
v_reinterpret_as_u16
(
pack32_lo
);
// a b4 g4 r4 b5 g5 r5 a ...
b1
=
v_reinterpret_as_u16
(
pack32_hi
);
v_store_low
(
dst
-
1
+
0
,
v_reinterpret_as_u8
(
b0
));
v_store_high
(
dst
-
1
+
8
*
1
,
v_reinterpret_as_u8
(
b0
));
v_store_low
(
dst
-
1
+
8
*
2
,
v_reinterpret_as_u8
(
b1
));
v_store_high
(
dst
-
1
+
8
*
3
,
v_reinterpret_as_u8
(
b1
));
// a b8 g8 r8 b9 g9 r9 a ...
v_zip
(
v_reinterpret_as_u32
(
g1
),
v_reinterpret_as_u32
(
r1
),
pack32_lo
,
pack32_hi
);
g0
=
v_reinterpret_as_u16
(
pack32_lo
);
// a b12 g12 r12 b13 g13 r13 a ...
g1
=
v_reinterpret_as_u16
(
pack32_hi
);
v_store_low
(
dst
-
1
+
8
*
4
,
v_reinterpret_as_u8
(
g0
));
v_store_high
(
dst
-
1
+
8
*
5
,
v_reinterpret_as_u8
(
g0
));
v_store_low
(
dst
-
1
+
8
*
6
,
v_reinterpret_as_u8
(
g1
));
}
#endif
return
(
int
)(
bayer
-
(
bayer_end
-
width
));
}
int
bayer2RGB_EA
(
const
uchar
*
,
int
,
uchar
*
,
int
,
int
)
const
int
bayer2RGB_EA
(
const
uchar
*
bayer
,
int
bayer_step
,
uchar
*
dst
,
int
width
,
int
blue
)
const
{
return
0
;
const
uchar
*
bayer_end
=
bayer
+
width
;
v_uint16x8
masklow
=
v_setall_u16
(
0x00ff
);
v_uint16x8
delta1
=
v_setall_u16
(
1
),
delta2
=
v_setall_u16
(
2
);
v_uint16x8
full
=
v_setall_u16
((
ushort
)(
-
1
));
v_uint8x16
z
=
v_setzero_u8
();
v_uint16x8
mask
=
v_setall_u16
(
blue
>
0
?
(
ushort
)(
-
1
)
:
0
);
for
(
;
bayer
<=
bayer_end
-
18
;
bayer
+=
14
,
dst
+=
42
)
{
/*
B G B G | B G B G | B G B G | B G B G
G R G R | G R G R | G R G R | G R G R
B G B G | B G B G | B G B G | B G B G
*/
v_uint16x8
r0
=
v_load
((
ushort
*
)
bayer
);
v_uint16x8
r1
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
));
v_uint16x8
r2
=
v_load
((
ushort
*
)(
bayer
+
bayer_step
*
2
));
v_uint16x8
b1
=
(
r0
&
masklow
)
+
(
r2
&
masklow
);
v_uint16x8
nextb1
=
v_rotate_right
<
1
>
(
b1
);
v_uint16x8
b0
=
b1
+
nextb1
;
b1
=
(
nextb1
+
delta1
)
>>
1
;
b0
=
(
b0
+
delta2
)
>>
2
;
// b0 b2 ... b14 b1 b3 ... b15
b0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
b0
),
v_reinterpret_as_s16
(
b1
)));
// vertical sum
v_uint16x8
r0g
=
r0
>>
8
;
v_uint16x8
r2g
=
r2
>>
8
;
v_uint16x8
sumv
=
((
r0g
+
r2g
)
+
delta1
)
>>
1
;
// horizontal sum
v_uint16x8
g1
=
r1
&
masklow
;
v_uint16x8
nextg1
=
v_rotate_right
<
1
>
(
g1
);
v_uint16x8
sumg
=
(
g1
+
nextg1
+
delta1
)
>>
1
;
// gradients
v_uint16x8
gradv
=
(
r0g
-
r2g
)
+
(
r2g
-
r0g
);
v_uint16x8
gradg
=
(
nextg1
-
g1
)
+
(
g1
-
nextg1
);
v_uint16x8
gmask
=
gradg
>
gradv
;
v_uint16x8
g0
=
(
gmask
&
sumv
)
+
(
sumg
&
(
gmask
^
full
));
// g0 g2 ... g14 g1 g3 ...
g0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
g0
),
v_reinterpret_as_s16
(
nextg1
)));
r0
=
r1
>>
8
;
r1
=
v_rotate_right
<
1
>
(
r0
)
+
r0
;
r1
=
(
r1
+
delta1
)
>>
1
;
// r0 r2 ... r14 r1 r3 ... r15
r0
=
v_reinterpret_as_u16
(
v_pack_u
(
v_reinterpret_as_s16
(
r0
),
v_reinterpret_as_s16
(
r1
)));
b1
=
(
b0
^
r0
)
&
mask
;
b0
=
b0
^
b1
;
r0
=
r0
^
b1
;
// b1 g1 b3 g3 b5 g5...
v_uint8x16
pack_lo
,
pack_hi
;
v_zip
(
v_reinterpret_as_u8
(
b0
),
v_reinterpret_as_u8
(
g0
),
pack_lo
,
pack_hi
);
b1
=
v_reinterpret_as_u16
(
pack_hi
);
// b0 g0 b2 g2 b4 g4 ....
b0
=
v_reinterpret_as_u16
(
pack_lo
);
// r1 0 r3 0 r5 0 ...
v_zip
(
v_reinterpret_as_u8
(
r0
),
z
,
pack_lo
,
pack_hi
);
r1
=
v_reinterpret_as_u16
(
pack_hi
);
// r0 0 r2 0 r4 0 ...
r0
=
v_reinterpret_as_u16
(
pack_lo
);
// 0 b0 g0 r0 0 b2 g2 r2 ...
v_zip
(
b0
,
r0
,
g0
,
g1
);
g0
=
v_reinterpret_as_u16
(
v_rotate_left
<
1
>
(
v_reinterpret_as_u8
(
g0
)));
// 0 b8 g8 r8 0 b10 g10 r10 ...
g1
=
v_reinterpret_as_u16
(
v_rotate_left
<
1
>
(
v_reinterpret_as_u8
(
g1
)));
// b1 g1 r1 0 b3 g3 r3 0 ...
v_zip
(
b1
,
r1
,
r0
,
r1
);
// b9 g9 r9 0 b11 g11 r11 0 ...
// 0 b0 g0 r0 b1 g1 r1 0 ...
v_uint32x4
pack32_lo
,
pack32_hi
;
v_zip
(
v_reinterpret_as_u32
(
g0
),
v_reinterpret_as_u32
(
r0
),
pack32_lo
,
pack32_hi
);
b0
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_lo
)));
// 0 b4 g4 r4 b5 g5 r5 0 ...
b1
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_hi
)));
v_store_low
(
dst
+
0
,
v_reinterpret_as_u8
(
b0
));
v_store_high
(
dst
+
6
*
1
,
v_reinterpret_as_u8
(
b0
));
v_store_low
(
dst
+
6
*
2
,
v_reinterpret_as_u8
(
b1
));
v_store_high
(
dst
+
6
*
3
,
v_reinterpret_as_u8
(
b1
));
// 0 b8 g8 r8 b9 g9 r9 0 ...
v_zip
(
v_reinterpret_as_u32
(
g1
),
v_reinterpret_as_u32
(
r1
),
pack32_lo
,
pack32_hi
);
g0
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_lo
)));
// 0 b12 g12 r12 b13 g13 r13 0 ...
g1
=
v_reinterpret_as_u16
(
v_rotate_right
<
1
>
(
v_reinterpret_as_u8
(
pack32_hi
)));
v_store_low
(
dst
+
6
*
4
,
v_reinterpret_as_u8
(
g0
));
v_store_high
(
dst
+
6
*
5
,
v_reinterpret_as_u8
(
g0
));
v_store_low
(
dst
+
6
*
6
,
v_reinterpret_as_u8
(
g1
));
}
return
int
(
bayer
-
(
bayer_end
-
width
));
}
};
#else
...
...
@@ -775,7 +820,7 @@ public:
// simd optimization only for dcn == 3
int
delta
=
dcn
==
4
?
vecOp
.
bayer2RGBA
(
bayer
,
bayer_step
,
dst
,
size
.
width
,
blue
)
:
vecOp
.
bayer2RGBA
(
bayer
,
bayer_step
,
dst
,
size
.
width
,
blue
,
alpha
)
:
vecOp
.
bayer2RGB
(
bayer
,
bayer_step
,
dst
,
size
.
width
,
blue
);
bayer
+=
delta
;
dst
+=
delta
*
dcn
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment