Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a54f6bb0
Commit
a54f6bb0
authored
Oct 05, 2014
by
Ilya Lavrenov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cv::cvtColor
parent
6f05a250
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1369 additions
and
21 deletions
+1369
-21
perf_blur.cpp
modules/imgproc/perf/perf_blur.cpp
+6
-1
color.cpp
modules/imgproc/src/color.cpp
+1363
-12
imgwarp.cpp
modules/imgproc/src/imgwarp.cpp
+0
-8
No files found.
modules/imgproc/perf/perf_blur.cpp
View file @
a54f6bb0
...
...
@@ -98,6 +98,11 @@ PERF_TEST_P(Size_MatType_BorderType, blur16x16,
Size
size
=
get
<
0
>
(
GetParam
());
int
type
=
get
<
1
>
(
GetParam
());
BorderType
btype
=
get
<
2
>
(
GetParam
());
double
eps
=
1e-3
;
#if CV_NEON
eps
=
CV_MAT_DEPTH
(
type
)
<=
CV_32S
?
1
:
eps
;
#endif
Mat
src
(
size
,
type
);
Mat
dst
(
size
,
type
);
...
...
@@ -106,7 +111,7 @@ PERF_TEST_P(Size_MatType_BorderType, blur16x16,
TEST_CYCLE
()
blur
(
src
,
dst
,
Size
(
16
,
16
),
Point
(
-
1
,
-
1
),
btype
);
SANITY_CHECK
(
dst
,
1e-3
);
SANITY_CHECK
(
dst
,
eps
);
}
PERF_TEST_P
(
Size_MatType_BorderType3x3
,
box3x3
,
...
...
modules/imgproc/src/color.cpp
View file @
a54f6bb0
...
...
@@ -769,7 +769,6 @@ template<typename _Tp> struct RGB2Gray
float
coeffs
[
3
];
};
template
<>
struct
RGB2Gray
<
uchar
>
{
typedef
uchar
channel_type
;
...
...
@@ -800,6 +799,166 @@ template<> struct RGB2Gray<uchar>
int
tab
[
256
*
3
];
};
#if CV_NEON
template
<>
struct
RGB2Gray
<
ushort
>
{
typedef
ushort
channel_type
;
RGB2Gray
(
int
_srccn
,
int
blueIdx
,
const
int
*
_coeffs
)
:
srccn
(
_srccn
)
{
static
const
int
coeffs0
[]
=
{
R2Y
,
G2Y
,
B2Y
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
3
*
sizeof
(
coeffs
[
0
]));
if
(
blueIdx
==
0
)
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
v_cb
=
vdup_n_u16
(
coeffs
[
0
]);
v_cg
=
vdup_n_u16
(
coeffs
[
1
]);
v_cr
=
vdup_n_u16
(
coeffs
[
2
]);
v_delta
=
vdupq_n_u32
(
1
<<
(
yuv_shift
-
1
));
}
void
operator
()(
const
ushort
*
src
,
ushort
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
cb
=
coeffs
[
0
],
cg
=
coeffs
[
1
],
cr
=
coeffs
[
2
],
i
=
0
;
for
(
;
i
<=
n
-
8
;
i
+=
8
,
src
+=
scn
*
8
)
{
uint16x8_t
v_b
,
v_r
,
v_g
;
if
(
scn
==
3
)
{
uint16x8x3_t
v_src
=
vld3q_u16
(
src
);
v_b
=
v_src
.
val
[
0
];
v_g
=
v_src
.
val
[
1
];
v_r
=
v_src
.
val
[
2
];
}
else
{
uint16x8x4_t
v_src
=
vld4q_u16
(
src
);
v_b
=
v_src
.
val
[
0
];
v_g
=
v_src
.
val
[
1
];
v_r
=
v_src
.
val
[
2
];
}
uint32x4_t
v_dst0_
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
vget_low_u16
(
v_b
),
v_cb
),
vget_low_u16
(
v_g
),
v_cg
),
vget_low_u16
(
v_r
),
v_cr
);
uint32x4_t
v_dst1_
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
vget_high_u16
(
v_b
),
v_cb
),
vget_high_u16
(
v_g
),
v_cg
),
vget_high_u16
(
v_r
),
v_cr
);
uint16x4_t
v_dst0
=
vmovn_u32
(
vshrq_n_u32
(
vaddq_u32
(
v_dst0_
,
v_delta
),
yuv_shift
));
uint16x4_t
v_dst1
=
vmovn_u32
(
vshrq_n_u32
(
vaddq_u32
(
v_dst1_
,
v_delta
),
yuv_shift
));
vst1q_u16
(
dst
+
i
,
vcombine_u16
(
v_dst0
,
v_dst1
));
}
for
(
;
i
<=
n
-
4
;
i
+=
4
,
src
+=
scn
*
4
)
{
uint16x4_t
v_b
,
v_r
,
v_g
;
if
(
scn
==
3
)
{
uint16x4x3_t
v_src
=
vld3_u16
(
src
);
v_b
=
v_src
.
val
[
0
];
v_g
=
v_src
.
val
[
1
];
v_r
=
v_src
.
val
[
2
];
}
else
{
uint16x4x4_t
v_src
=
vld4_u16
(
src
);
v_b
=
v_src
.
val
[
0
];
v_g
=
v_src
.
val
[
1
];
v_r
=
v_src
.
val
[
2
];
}
uint32x4_t
v_dst
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_b
,
v_cb
),
v_g
,
v_cg
),
v_r
,
v_cr
);
vst1_u16
(
dst
+
i
,
vmovn_u32
(
vshrq_n_u32
(
vaddq_u32
(
v_dst
,
v_delta
),
yuv_shift
)));
}
for
(
;
i
<
n
;
i
++
,
src
+=
scn
)
dst
[
i
]
=
(
ushort
)
CV_DESCALE
((
unsigned
)(
src
[
0
]
*
cb
+
src
[
1
]
*
cg
+
src
[
2
]
*
cr
),
yuv_shift
);
}
int
srccn
,
coeffs
[
3
];
uint16x4_t
v_cb
,
v_cg
,
v_cr
;
uint32x4_t
v_delta
;
};
template
<>
struct
RGB2Gray
<
float
>
{
typedef
float
channel_type
;
RGB2Gray
(
int
_srccn
,
int
blueIdx
,
const
float
*
_coeffs
)
:
srccn
(
_srccn
)
{
static
const
float
coeffs0
[]
=
{
0.299
f
,
0.587
f
,
0.114
f
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
3
*
sizeof
(
coeffs
[
0
])
);
if
(
blueIdx
==
0
)
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
v_cb
=
vdupq_n_f32
(
coeffs
[
0
]);
v_cg
=
vdupq_n_f32
(
coeffs
[
1
]);
v_cr
=
vdupq_n_f32
(
coeffs
[
2
]);
}
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
i
=
0
;
float
cb
=
coeffs
[
0
],
cg
=
coeffs
[
1
],
cr
=
coeffs
[
2
];
if
(
scn
==
3
)
{
for
(
;
i
<=
n
-
8
;
i
+=
8
,
src
+=
scn
*
8
)
{
float32x4x3_t
v_src
=
vld3q_f32
(
src
);
vst1q_f32
(
dst
+
i
,
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_cb
),
v_src
.
val
[
1
],
v_cg
),
v_src
.
val
[
2
],
v_cr
));
v_src
=
vld3q_f32
(
src
+
scn
*
4
);
vst1q_f32
(
dst
+
i
+
4
,
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_cb
),
v_src
.
val
[
1
],
v_cg
),
v_src
.
val
[
2
],
v_cr
));
}
for
(
;
i
<=
n
-
4
;
i
+=
4
,
src
+=
scn
*
4
)
{
float32x4x3_t
v_src
=
vld3q_f32
(
src
);
vst1q_f32
(
dst
+
i
,
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_cb
),
v_src
.
val
[
1
],
v_cg
),
v_src
.
val
[
2
],
v_cr
));
}
}
else
{
for
(
;
i
<=
n
-
8
;
i
+=
8
,
src
+=
scn
*
8
)
{
float32x4x4_t
v_src
=
vld4q_f32
(
src
);
vst1q_f32
(
dst
+
i
,
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_cb
),
v_src
.
val
[
1
],
v_cg
),
v_src
.
val
[
2
],
v_cr
));
v_src
=
vld4q_f32
(
src
+
scn
*
4
);
vst1q_f32
(
dst
+
i
+
4
,
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_cb
),
v_src
.
val
[
1
],
v_cg
),
v_src
.
val
[
2
],
v_cr
));
}
for
(
;
i
<=
n
-
4
;
i
+=
4
,
src
+=
scn
*
4
)
{
float32x4x4_t
v_src
=
vld4q_f32
(
src
);
vst1q_f32
(
dst
+
i
,
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_cb
),
v_src
.
val
[
1
],
v_cg
),
v_src
.
val
[
2
],
v_cr
));
}
}
for
(
;
i
<
n
;
i
++
,
src
+=
scn
)
dst
[
i
]
=
src
[
0
]
*
cb
+
src
[
1
]
*
cg
+
src
[
2
]
*
cr
;
}
int
srccn
;
float
coeffs
[
3
];
float32x4_t
v_cb
,
v_cg
,
v_cr
;
};
#else
template
<>
struct
RGB2Gray
<
ushort
>
{
...
...
@@ -823,6 +982,7 @@ template<> struct RGB2Gray<ushort>
int
coeffs
[
3
];
};
#endif
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
...
...
@@ -855,6 +1015,72 @@ template<typename _Tp> struct RGB2YCrCb_f
float
coeffs
[
5
];
};
#if CV_NEON
template
<>
struct
RGB2YCrCb_f
<
float
>
{
typedef
float
channel_type
;
RGB2YCrCb_f
(
int
_srccn
,
int
_blueIdx
,
const
float
*
_coeffs
)
:
srccn
(
_srccn
),
blueIdx
(
_blueIdx
)
{
static
const
float
coeffs0
[]
=
{
0.299
f
,
0.587
f
,
0.114
f
,
0.713
f
,
0.564
f
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
5
*
sizeof
(
coeffs
[
0
]));
if
(
blueIdx
==
0
)
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
v_c0
=
vdupq_n_f32
(
coeffs
[
0
]);
v_c1
=
vdupq_n_f32
(
coeffs
[
1
]);
v_c2
=
vdupq_n_f32
(
coeffs
[
2
]);
v_c3
=
vdupq_n_f32
(
coeffs
[
3
]);
v_c4
=
vdupq_n_f32
(
coeffs
[
4
]);
v_delta
=
vdupq_n_f32
(
ColorChannel
<
float
>::
half
());
}
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
bidx
=
blueIdx
,
i
=
0
;
const
float
delta
=
ColorChannel
<
float
>::
half
();
float
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
];
n
*=
3
;
if
(
scn
==
3
)
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
12
)
{
float32x4x3_t
v_src
=
vld3q_f32
(
src
),
v_dst
;
v_dst
.
val
[
0
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c0
),
v_src
.
val
[
1
],
v_c1
),
v_src
.
val
[
2
],
v_c2
);
v_dst
.
val
[
1
]
=
vmlaq_f32
(
v_delta
,
vsubq_f32
(
v_src
.
val
[
bidx
^
2
],
v_dst
.
val
[
0
]),
v_c3
);
v_dst
.
val
[
2
]
=
vmlaq_f32
(
v_delta
,
vsubq_f32
(
v_src
.
val
[
bidx
],
v_dst
.
val
[
0
]),
v_c4
);
vst3q_f32
(
dst
+
i
,
v_dst
);
}
else
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
16
)
{
float32x4x4_t
v_src
=
vld4q_f32
(
src
);
float32x4x3_t
v_dst
;
v_dst
.
val
[
0
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c0
),
v_src
.
val
[
1
],
v_c1
),
v_src
.
val
[
2
],
v_c2
);
v_dst
.
val
[
1
]
=
vmlaq_f32
(
v_delta
,
vsubq_f32
(
v_src
.
val
[
bidx
^
2
],
v_dst
.
val
[
0
]),
v_c3
);
v_dst
.
val
[
2
]
=
vmlaq_f32
(
v_delta
,
vsubq_f32
(
v_src
.
val
[
bidx
],
v_dst
.
val
[
0
]),
v_c4
);
vst3q_f32
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
{
float
Y
=
src
[
0
]
*
C0
+
src
[
1
]
*
C1
+
src
[
2
]
*
C2
;
float
Cr
=
(
src
[
bidx
^
2
]
-
Y
)
*
C3
+
delta
;
float
Cb
=
(
src
[
bidx
]
-
Y
)
*
C4
+
delta
;
dst
[
i
]
=
Y
;
dst
[
i
+
1
]
=
Cr
;
dst
[
i
+
2
]
=
Cb
;
}
}
int
srccn
,
blueIdx
;
float
coeffs
[
5
];
float32x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_c4
,
v_delta
;
};
#endif
template
<
typename
_Tp
>
struct
RGB2YCrCb_i
{
...
...
@@ -887,6 +1113,224 @@ template<typename _Tp> struct RGB2YCrCb_i
int
coeffs
[
5
];
};
#if CV_NEON
template
<>
struct
RGB2YCrCb_i
<
uchar
>
{
typedef
uchar
channel_type
;
RGB2YCrCb_i
(
int
_srccn
,
int
_blueIdx
,
const
int
*
_coeffs
)
:
srccn
(
_srccn
),
blueIdx
(
_blueIdx
)
{
static
const
int
coeffs0
[]
=
{
R2Y
,
G2Y
,
B2Y
,
11682
,
9241
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
5
*
sizeof
(
coeffs
[
0
]));
if
(
blueIdx
==
0
)
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
v_c0
=
vdup_n_s16
(
coeffs
[
0
]);
v_c1
=
vdup_n_s16
(
coeffs
[
1
]);
v_c2
=
vdup_n_s16
(
coeffs
[
2
]);
v_c3
=
vdupq_n_s32
(
coeffs
[
3
]);
v_c4
=
vdupq_n_s32
(
coeffs
[
4
]);
v_delta
=
vdupq_n_s32
(
ColorChannel
<
uchar
>::
half
()
*
(
1
<<
yuv_shift
));
v_delta2
=
vdupq_n_s32
(
1
<<
(
yuv_shift
-
1
));
}
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
bidx
=
blueIdx
,
i
=
0
;
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
];
int
delta
=
ColorChannel
<
uchar
>::
half
()
*
(
1
<<
yuv_shift
);
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
src
+=
scn
*
8
)
{
uint8x8x3_t
v_dst
;
int16x8x3_t
v_src16
;
if
(
scn
==
3
)
{
uint8x8x3_t
v_src
=
vld3_u8
(
src
);
v_src16
.
val
[
0
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
0
]));
v_src16
.
val
[
1
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
1
]));
v_src16
.
val
[
2
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
2
]));
}
else
{
uint8x8x4_t
v_src
=
vld4_u8
(
src
);
v_src16
.
val
[
0
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
0
]));
v_src16
.
val
[
1
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
1
]));
v_src16
.
val
[
2
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
2
]));
}
int16x4x3_t
v_src0
;
v_src0
.
val
[
0
]
=
vget_low_s16
(
v_src16
.
val
[
0
]);
v_src0
.
val
[
1
]
=
vget_low_s16
(
v_src16
.
val
[
1
]);
v_src0
.
val
[
2
]
=
vget_low_s16
(
v_src16
.
val
[
2
]);
int32x4_t
v_Y0
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_src0
.
val
[
0
],
v_c0
),
v_src0
.
val
[
1
],
v_c1
),
v_src0
.
val
[
2
],
v_c2
);
v_Y0
=
vshrq_n_s32
(
vaddq_s32
(
v_Y0
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cr0
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
vmovl_s16
(
v_src0
.
val
[
bidx
^
2
]),
v_Y0
),
v_c3
);
v_Cr0
=
vshrq_n_s32
(
vaddq_s32
(
v_Cr0
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cb0
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
vmovl_s16
(
v_src0
.
val
[
bidx
]),
v_Y0
),
v_c4
);
v_Cb0
=
vshrq_n_s32
(
vaddq_s32
(
v_Cb0
,
v_delta2
),
yuv_shift
);
v_src0
.
val
[
0
]
=
vget_high_s16
(
v_src16
.
val
[
0
]);
v_src0
.
val
[
1
]
=
vget_high_s16
(
v_src16
.
val
[
1
]);
v_src0
.
val
[
2
]
=
vget_high_s16
(
v_src16
.
val
[
2
]);
int32x4_t
v_Y1
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_src0
.
val
[
0
],
v_c0
),
v_src0
.
val
[
1
],
v_c1
),
v_src0
.
val
[
2
],
v_c2
);
v_Y1
=
vshrq_n_s32
(
vaddq_s32
(
v_Y1
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cr1
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
vmovl_s16
(
v_src0
.
val
[
bidx
^
2
]),
v_Y1
),
v_c3
);
v_Cr1
=
vshrq_n_s32
(
vaddq_s32
(
v_Cr1
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cb1
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
vmovl_s16
(
v_src0
.
val
[
bidx
]),
v_Y1
),
v_c4
);
v_Cb1
=
vshrq_n_s32
(
vaddq_s32
(
v_Cb1
,
v_delta2
),
yuv_shift
);
v_dst
.
val
[
0
]
=
vqmovun_s16
(
vcombine_s16
(
vqmovn_s32
(
v_Y0
),
vqmovn_s32
(
v_Y1
)));
v_dst
.
val
[
1
]
=
vqmovun_s16
(
vcombine_s16
(
vqmovn_s32
(
v_Cr0
),
vqmovn_s32
(
v_Cr1
)));
v_dst
.
val
[
2
]
=
vqmovun_s16
(
vcombine_s16
(
vqmovn_s32
(
v_Cb0
),
vqmovn_s32
(
v_Cb1
)));
vst3_u8
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
{
int
Y
=
CV_DESCALE
(
src
[
0
]
*
C0
+
src
[
1
]
*
C1
+
src
[
2
]
*
C2
,
yuv_shift
);
int
Cr
=
CV_DESCALE
((
src
[
bidx
^
2
]
-
Y
)
*
C3
+
delta
,
yuv_shift
);
int
Cb
=
CV_DESCALE
((
src
[
bidx
]
-
Y
)
*
C4
+
delta
,
yuv_shift
);
dst
[
i
]
=
saturate_cast
<
uchar
>
(
Y
);
dst
[
i
+
1
]
=
saturate_cast
<
uchar
>
(
Cr
);
dst
[
i
+
2
]
=
saturate_cast
<
uchar
>
(
Cb
);
}
}
int
srccn
,
blueIdx
,
coeffs
[
5
];
int16x4_t
v_c0
,
v_c1
,
v_c2
;
int32x4_t
v_c3
,
v_c4
,
v_delta
,
v_delta2
;
};
template
<>
struct
RGB2YCrCb_i
<
ushort
>
{
typedef
ushort
channel_type
;
RGB2YCrCb_i
(
int
_srccn
,
int
_blueIdx
,
const
int
*
_coeffs
)
:
srccn
(
_srccn
),
blueIdx
(
_blueIdx
)
{
static
const
int
coeffs0
[]
=
{
R2Y
,
G2Y
,
B2Y
,
11682
,
9241
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
5
*
sizeof
(
coeffs
[
0
]));
if
(
blueIdx
==
0
)
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
v_c0
=
vdupq_n_s32
(
coeffs
[
0
]);
v_c1
=
vdupq_n_s32
(
coeffs
[
1
]);
v_c2
=
vdupq_n_s32
(
coeffs
[
2
]);
v_c3
=
vdupq_n_s32
(
coeffs
[
3
]);
v_c4
=
vdupq_n_s32
(
coeffs
[
4
]);
v_delta
=
vdupq_n_s32
(
ColorChannel
<
ushort
>::
half
()
*
(
1
<<
yuv_shift
));
v_delta2
=
vdupq_n_s32
(
1
<<
(
yuv_shift
-
1
));
}
void
operator
()(
const
ushort
*
src
,
ushort
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
bidx
=
blueIdx
,
i
=
0
;
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
];
int
delta
=
ColorChannel
<
ushort
>::
half
()
*
(
1
<<
yuv_shift
);
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
src
+=
scn
*
8
)
{
uint16x8x3_t
v_src
,
v_dst
;
int32x4x3_t
v_src0
;
if
(
scn
==
3
)
v_src
=
vld3q_u16
(
src
);
else
{
uint16x8x4_t
v_src_
=
vld4q_u16
(
src
);
v_src
.
val
[
0
]
=
v_src_
.
val
[
0
];
v_src
.
val
[
1
]
=
v_src_
.
val
[
1
];
v_src
.
val
[
2
]
=
v_src_
.
val
[
2
];
}
v_src0
.
val
[
0
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
0
])));
v_src0
.
val
[
1
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
1
])));
v_src0
.
val
[
2
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
2
])));
int32x4_t
v_Y0
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_src0
.
val
[
0
],
v_c0
),
v_src0
.
val
[
1
],
v_c1
),
v_src0
.
val
[
2
],
v_c2
);
v_Y0
=
vshrq_n_s32
(
vaddq_s32
(
v_Y0
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cr0
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
v_src0
.
val
[
bidx
^
2
],
v_Y0
),
v_c3
);
v_Cr0
=
vshrq_n_s32
(
vaddq_s32
(
v_Cr0
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cb0
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
v_src0
.
val
[
bidx
],
v_Y0
),
v_c4
);
v_Cb0
=
vshrq_n_s32
(
vaddq_s32
(
v_Cb0
,
v_delta2
),
yuv_shift
);
v_src0
.
val
[
0
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
0
])));
v_src0
.
val
[
1
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
1
])));
v_src0
.
val
[
2
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
2
])));
int32x4_t
v_Y1
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_src0
.
val
[
0
],
v_c0
),
v_src0
.
val
[
1
],
v_c1
),
v_src0
.
val
[
2
],
v_c2
);
v_Y1
=
vshrq_n_s32
(
vaddq_s32
(
v_Y1
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cr1
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
v_src0
.
val
[
bidx
^
2
],
v_Y1
),
v_c3
);
v_Cr1
=
vshrq_n_s32
(
vaddq_s32
(
v_Cr1
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cb1
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
v_src0
.
val
[
bidx
],
v_Y1
),
v_c4
);
v_Cb1
=
vshrq_n_s32
(
vaddq_s32
(
v_Cb1
,
v_delta2
),
yuv_shift
);
v_dst
.
val
[
0
]
=
vcombine_u16
(
vqmovun_s32
(
v_Y0
),
vqmovun_s32
(
v_Y1
));
v_dst
.
val
[
1
]
=
vcombine_u16
(
vqmovun_s32
(
v_Cr0
),
vqmovun_s32
(
v_Cr1
));
v_dst
.
val
[
2
]
=
vcombine_u16
(
vqmovun_s32
(
v_Cb0
),
vqmovun_s32
(
v_Cb1
));
vst3q_u16
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
scn
*
4
)
{
uint16x4x3_t
v_dst
;
int32x4x3_t
v_src0
;
if
(
scn
==
3
)
{
uint16x4x3_t
v_src
=
vld3_u16
(
src
);
v_src0
.
val
[
0
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
0
]));
v_src0
.
val
[
1
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
1
]));
v_src0
.
val
[
2
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
2
]));
}
else
{
uint16x4x4_t
v_src
=
vld4_u16
(
src
);
v_src0
.
val
[
0
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
0
]));
v_src0
.
val
[
1
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
1
]));
v_src0
.
val
[
2
]
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
2
]));
}
int32x4_t
v_Y
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_src0
.
val
[
0
],
v_c0
),
v_src0
.
val
[
1
],
v_c1
),
v_src0
.
val
[
2
],
v_c2
);
v_Y
=
vshrq_n_s32
(
vaddq_s32
(
v_Y
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cr
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
v_src0
.
val
[
bidx
^
2
],
v_Y
),
v_c3
);
v_Cr
=
vshrq_n_s32
(
vaddq_s32
(
v_Cr
,
v_delta2
),
yuv_shift
);
int32x4_t
v_Cb
=
vmlaq_s32
(
v_delta
,
vsubq_s32
(
v_src0
.
val
[
bidx
],
v_Y
),
v_c4
);
v_Cb
=
vshrq_n_s32
(
vaddq_s32
(
v_Cb
,
v_delta2
),
yuv_shift
);
v_dst
.
val
[
0
]
=
vqmovun_s32
(
v_Y
);
v_dst
.
val
[
1
]
=
vqmovun_s32
(
v_Cr
);
v_dst
.
val
[
2
]
=
vqmovun_s32
(
v_Cb
);
vst3_u16
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
{
int
Y
=
CV_DESCALE
(
src
[
0
]
*
C0
+
src
[
1
]
*
C1
+
src
[
2
]
*
C2
,
yuv_shift
);
int
Cr
=
CV_DESCALE
((
src
[
bidx
^
2
]
-
Y
)
*
C3
+
delta
,
yuv_shift
);
int
Cb
=
CV_DESCALE
((
src
[
bidx
]
-
Y
)
*
C4
+
delta
,
yuv_shift
);
dst
[
i
]
=
saturate_cast
<
ushort
>
(
Y
);
dst
[
i
+
1
]
=
saturate_cast
<
ushort
>
(
Cr
);
dst
[
i
+
2
]
=
saturate_cast
<
ushort
>
(
Cb
);
}
}
int
srccn
,
blueIdx
,
coeffs
[
5
];
int32x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_c4
,
v_delta
,
v_delta2
;
};
#endif
template
<
typename
_Tp
>
struct
YCrCb2RGB_f
{
...
...
@@ -923,6 +1367,80 @@ template<typename _Tp> struct YCrCb2RGB_f
float
coeffs
[
4
];
};
#if CV_NEON
template
<>
struct
YCrCb2RGB_f
<
float
>
{
typedef
float
channel_type
;
YCrCb2RGB_f
(
int
_dstcn
,
int
_blueIdx
,
const
float
*
_coeffs
)
:
dstcn
(
_dstcn
),
blueIdx
(
_blueIdx
)
{
static
const
float
coeffs0
[]
=
{
1.403
f
,
-
0.714
f
,
-
0.344
f
,
1.773
f
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
4
*
sizeof
(
coeffs
[
0
]));
v_c0
=
vdupq_n_f32
(
coeffs
[
0
]);
v_c1
=
vdupq_n_f32
(
coeffs
[
1
]);
v_c2
=
vdupq_n_f32
(
coeffs
[
2
]);
v_c3
=
vdupq_n_f32
(
coeffs
[
3
]);
v_delta
=
vdupq_n_f32
(
ColorChannel
<
float
>::
half
());
v_alpha
=
vdupq_n_f32
(
ColorChannel
<
float
>::
max
());
}
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
dcn
=
dstcn
,
bidx
=
blueIdx
,
i
=
0
;
const
float
delta
=
ColorChannel
<
float
>::
half
(),
alpha
=
ColorChannel
<
float
>::
max
();
float
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
];
n
*=
3
;
if
(
dcn
==
3
)
for
(
;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
12
)
{
float32x4x3_t
v_src
=
vld3q_f32
(
src
+
i
),
v_dst
;
float32x4_t
v_Y
=
v_src
.
val
[
0
],
v_Cr
=
v_src
.
val
[
1
],
v_Cb
=
v_src
.
val
[
2
];
v_dst
.
val
[
bidx
]
=
vmlaq_f32
(
v_Y
,
vsubq_f32
(
v_Cb
,
v_delta
),
v_c3
);
v_dst
.
val
[
1
]
=
vaddq_f32
(
vmlaq_f32
(
vmulq_f32
(
vsubq_f32
(
v_Cb
,
v_delta
),
v_c2
),
vsubq_f32
(
v_Cr
,
v_delta
),
v_c1
),
v_Y
);
v_dst
.
val
[
bidx
^
2
]
=
vmlaq_f32
(
v_Y
,
vsubq_f32
(
v_Cr
,
v_delta
),
v_c0
);
vst3q_f32
(
dst
,
v_dst
);
}
else
for
(
;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
16
)
{
float32x4x3_t
v_src
=
vld3q_f32
(
src
+
i
);
float32x4x4_t
v_dst
;
float32x4_t
v_Y
=
v_src
.
val
[
0
],
v_Cr
=
v_src
.
val
[
1
],
v_Cb
=
v_src
.
val
[
2
];
v_dst
.
val
[
bidx
]
=
vmlaq_f32
(
v_Y
,
vsubq_f32
(
v_Cb
,
v_delta
),
v_c3
);
v_dst
.
val
[
1
]
=
vaddq_f32
(
vmlaq_f32
(
vmulq_f32
(
vsubq_f32
(
v_Cb
,
v_delta
),
v_c2
),
vsubq_f32
(
v_Cr
,
v_delta
),
v_c1
),
v_Y
);
v_dst
.
val
[
bidx
^
2
]
=
vmlaq_f32
(
v_Y
,
vsubq_f32
(
v_Cr
,
v_delta
),
v_c0
);
v_dst
.
val
[
3
]
=
v_alpha
;
vst4q_f32
(
dst
,
v_dst
);
}
for
(
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
float
Y
=
src
[
i
],
Cr
=
src
[
i
+
1
],
Cb
=
src
[
i
+
2
];
float
b
=
Y
+
(
Cb
-
delta
)
*
C3
;
float
g
=
Y
+
(
Cb
-
delta
)
*
C2
+
(
Cr
-
delta
)
*
C1
;
float
r
=
Y
+
(
Cr
-
delta
)
*
C0
;
dst
[
bidx
]
=
b
;
dst
[
1
]
=
g
;
dst
[
bidx
^
2
]
=
r
;
if
(
dcn
==
4
)
dst
[
3
]
=
alpha
;
}
}
int
dstcn
,
blueIdx
;
float
coeffs
[
4
];
float32x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_alpha
,
v_delta
;
};
#endif
template
<
typename
_Tp
>
struct
YCrCb2RGB_i
{
...
...
@@ -962,20 +1480,268 @@ template<typename _Tp> struct YCrCb2RGB_i
int
coeffs
[
4
];
};
#if CV_NEON
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
static
const
float
sRGB2XYZ_D65
[]
=
template
<>
struct
YCrCb2RGB_i
<
uchar
>
{
0.412453
f
,
0.357580
f
,
0.180423
f
,
0.212671
f
,
0.715160
f
,
0.072169
f
,
0.019334
f
,
0.119193
f
,
0.950227
f
};
typedef
uchar
channel_type
;
static
const
float
XYZ2sRGB_D65
[]
=
{
3.240479
f
,
-
1.53715
f
,
-
0.498535
f
,
-
0.969256
f
,
1.875991
f
,
0.041556
f
,
YCrCb2RGB_i
(
int
_dstcn
,
int
_blueIdx
,
const
int
*
_coeffs
)
:
dstcn
(
_dstcn
),
blueIdx
(
_blueIdx
)
{
static
const
int
coeffs0
[]
=
{
22987
,
-
11698
,
-
5636
,
29049
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
4
*
sizeof
(
coeffs
[
0
]));
v_c0
=
vdupq_n_s32
(
coeffs
[
0
]);
v_c1
=
vdupq_n_s32
(
coeffs
[
1
]);
v_c2
=
vdupq_n_s32
(
coeffs
[
2
]);
v_c3
=
vdupq_n_s32
(
coeffs
[
3
]);
v_delta
=
vdup_n_s16
(
ColorChannel
<
uchar
>::
half
());
v_delta2
=
vdupq_n_s32
(
1
<<
(
yuv_shift
-
1
));
v_alpha
=
vdup_n_u8
(
ColorChannel
<
uchar
>::
max
());
}
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
n
)
const
{
int
dcn
=
dstcn
,
bidx
=
blueIdx
,
i
=
0
;
const
uchar
delta
=
ColorChannel
<
uchar
>::
half
(),
alpha
=
ColorChannel
<
uchar
>::
max
();
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
];
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
uint8x8x3_t
v_src
=
vld3_u8
(
src
+
i
);
int16x8x3_t
v_src16
;
v_src16
.
val
[
0
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
0
]));
v_src16
.
val
[
1
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
1
]));
v_src16
.
val
[
2
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
2
]));
int16x4_t
v_Y
=
vget_low_s16
(
v_src16
.
val
[
0
]),
v_Cr
=
vget_low_s16
(
v_src16
.
val
[
1
]),
v_Cb
=
vget_low_s16
(
v_src16
.
val
[
2
]);
int32x4_t
v_b0
=
vmulq_s32
(
v_c3
,
vsubl_s16
(
v_Cb
,
v_delta
));
v_b0
=
vaddw_s16
(
vshrq_n_s32
(
vaddq_s32
(
v_b0
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_g0
=
vmlaq_s32
(
vmulq_s32
(
vsubl_s16
(
v_Cr
,
v_delta
),
v_c1
),
vsubl_s16
(
v_Cb
,
v_delta
),
v_c2
);
v_g0
=
vaddw_s16
(
vshrq_n_s32
(
vaddq_s32
(
v_g0
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_r0
=
vmulq_s32
(
v_c0
,
vsubl_s16
(
v_Cr
,
v_delta
));
v_r0
=
vaddw_s16
(
vshrq_n_s32
(
vaddq_s32
(
v_r0
,
v_delta2
),
yuv_shift
),
v_Y
);
v_Y
=
vget_high_s16
(
v_src16
.
val
[
0
]);
v_Cr
=
vget_high_s16
(
v_src16
.
val
[
1
]);
v_Cb
=
vget_high_s16
(
v_src16
.
val
[
2
]);
int32x4_t
v_b1
=
vmulq_s32
(
v_c3
,
vsubl_s16
(
v_Cb
,
v_delta
));
v_b1
=
vaddw_s16
(
vshrq_n_s32
(
vaddq_s32
(
v_b1
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_g1
=
vmlaq_s32
(
vmulq_s32
(
vsubl_s16
(
v_Cr
,
v_delta
),
v_c1
),
vsubl_s16
(
v_Cb
,
v_delta
),
v_c2
);
v_g1
=
vaddw_s16
(
vshrq_n_s32
(
vaddq_s32
(
v_g1
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_r1
=
vmulq_s32
(
v_c0
,
vsubl_s16
(
v_Cr
,
v_delta
));
v_r1
=
vaddw_s16
(
vshrq_n_s32
(
vaddq_s32
(
v_r1
,
v_delta2
),
yuv_shift
),
v_Y
);
uint8x8_t
v_b
=
vqmovun_s16
(
vcombine_s16
(
vmovn_s32
(
v_b0
),
vmovn_s32
(
v_b1
)));
uint8x8_t
v_g
=
vqmovun_s16
(
vcombine_s16
(
vmovn_s32
(
v_g0
),
vmovn_s32
(
v_g1
)));
uint8x8_t
v_r
=
vqmovun_s16
(
vcombine_s16
(
vmovn_s32
(
v_r0
),
vmovn_s32
(
v_r1
)));
if
(
dcn
==
3
)
{
uint8x8x3_t
v_dst
;
v_dst
.
val
[
bidx
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
bidx
^
2
]
=
v_r
;
vst3_u8
(
dst
,
v_dst
);
}
else
{
uint8x8x4_t
v_dst
;
v_dst
.
val
[
bidx
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
bidx
^
2
]
=
v_r
;
v_dst
.
val
[
3
]
=
v_alpha
;
vst4_u8
(
dst
,
v_dst
);
}
}
for
(
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
uchar
Y
=
src
[
i
];
uchar
Cr
=
src
[
i
+
1
];
uchar
Cb
=
src
[
i
+
2
];
int
b
=
Y
+
CV_DESCALE
((
Cb
-
delta
)
*
C3
,
yuv_shift
);
int
g
=
Y
+
CV_DESCALE
((
Cb
-
delta
)
*
C2
+
(
Cr
-
delta
)
*
C1
,
yuv_shift
);
int
r
=
Y
+
CV_DESCALE
((
Cr
-
delta
)
*
C0
,
yuv_shift
);
dst
[
bidx
]
=
saturate_cast
<
uchar
>
(
b
);
dst
[
1
]
=
saturate_cast
<
uchar
>
(
g
);
dst
[
bidx
^
2
]
=
saturate_cast
<
uchar
>
(
r
);
if
(
dcn
==
4
)
dst
[
3
]
=
alpha
;
}
}
int
dstcn
,
blueIdx
;
int
coeffs
[
4
];
int32x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_delta2
;
int16x4_t
v_delta
;
uint8x8_t
v_alpha
;
};
template
<>
struct
YCrCb2RGB_i
<
ushort
>
{
typedef
ushort
channel_type
;
YCrCb2RGB_i
(
int
_dstcn
,
int
_blueIdx
,
const
int
*
_coeffs
)
:
dstcn
(
_dstcn
),
blueIdx
(
_blueIdx
)
{
static
const
int
coeffs0
[]
=
{
22987
,
-
11698
,
-
5636
,
29049
};
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
coeffs0
,
4
*
sizeof
(
coeffs
[
0
]));
v_c0
=
vdupq_n_s32
(
coeffs
[
0
]);
v_c1
=
vdupq_n_s32
(
coeffs
[
1
]);
v_c2
=
vdupq_n_s32
(
coeffs
[
2
]);
v_c3
=
vdupq_n_s32
(
coeffs
[
3
]);
v_delta
=
vdupq_n_s32
(
ColorChannel
<
ushort
>::
half
());
v_delta2
=
vdupq_n_s32
(
1
<<
(
yuv_shift
-
1
));
v_alpha
=
vdupq_n_u16
(
ColorChannel
<
ushort
>::
max
());
v_alpha2
=
vget_low_u16
(
v_alpha
);
}
void
operator
()(
const
ushort
*
src
,
ushort
*
dst
,
int
n
)
const
{
int
dcn
=
dstcn
,
bidx
=
blueIdx
,
i
=
0
;
const
ushort
delta
=
ColorChannel
<
ushort
>::
half
(),
alpha
=
ColorChannel
<
ushort
>::
max
();
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
];
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
uint16x8x3_t
v_src
=
vld3q_u16
(
src
+
i
);
int32x4_t
v_Y
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
0
]))),
v_Cr
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
1
]))),
v_Cb
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
2
])));
int32x4_t
v_b0
=
vmulq_s32
(
v_c3
,
vsubq_s32
(
v_Cb
,
v_delta
));
v_b0
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_b0
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_g0
=
vmlaq_s32
(
vmulq_s32
(
vsubq_s32
(
v_Cr
,
v_delta
),
v_c1
),
vsubq_s32
(
v_Cb
,
v_delta
),
v_c2
);
v_g0
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_g0
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_r0
=
vmulq_s32
(
v_c0
,
vsubq_s32
(
v_Cr
,
v_delta
));
v_r0
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_r0
,
v_delta2
),
yuv_shift
),
v_Y
);
v_Y
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
0
]))),
v_Cr
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
1
]))),
v_Cb
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
2
])));
int32x4_t
v_b1
=
vmulq_s32
(
v_c3
,
vsubq_s32
(
v_Cb
,
v_delta
));
v_b1
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_b1
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_g1
=
vmlaq_s32
(
vmulq_s32
(
vsubq_s32
(
v_Cr
,
v_delta
),
v_c1
),
vsubq_s32
(
v_Cb
,
v_delta
),
v_c2
);
v_g1
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_g1
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_r1
=
vmulq_s32
(
v_c0
,
vsubq_s32
(
v_Cr
,
v_delta
));
v_r1
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_r1
,
v_delta2
),
yuv_shift
),
v_Y
);
uint16x8_t
v_b
=
vcombine_u16
(
vqmovun_s32
(
v_b0
),
vqmovun_s32
(
v_b1
));
uint16x8_t
v_g
=
vcombine_u16
(
vqmovun_s32
(
v_g0
),
vqmovun_s32
(
v_g1
));
uint16x8_t
v_r
=
vcombine_u16
(
vqmovun_s32
(
v_r0
),
vqmovun_s32
(
v_r1
));
if
(
dcn
==
3
)
{
uint16x8x3_t
v_dst
;
v_dst
.
val
[
bidx
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
bidx
^
2
]
=
v_r
;
vst3q_u16
(
dst
,
v_dst
);
}
else
{
uint16x8x4_t
v_dst
;
v_dst
.
val
[
bidx
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
bidx
^
2
]
=
v_r
;
v_dst
.
val
[
3
]
=
v_alpha
;
vst4q_u16
(
dst
,
v_dst
);
}
}
for
(
;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
uint16x4x3_t
v_src
=
vld3_u16
(
src
+
i
);
int32x4_t
v_Y
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
0
])),
v_Cr
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
1
])),
v_Cb
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
2
]));
int32x4_t
v_b
=
vmulq_s32
(
v_c3
,
vsubq_s32
(
v_Cb
,
v_delta
));
v_b
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_b
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_g
=
vmlaq_s32
(
vmulq_s32
(
vsubq_s32
(
v_Cr
,
v_delta
),
v_c1
),
vsubq_s32
(
v_Cb
,
v_delta
),
v_c2
);
v_g
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_g
,
v_delta2
),
yuv_shift
),
v_Y
);
int32x4_t
v_r
=
vmulq_s32
(
vsubq_s32
(
v_Cr
,
v_delta
),
v_c0
);
v_r
=
vaddq_s32
(
vshrq_n_s32
(
vaddq_s32
(
v_r
,
v_delta2
),
yuv_shift
),
v_Y
);
uint16x4_t
v_bd
=
vqmovun_s32
(
v_b
);
uint16x4_t
v_gd
=
vqmovun_s32
(
v_g
);
uint16x4_t
v_rd
=
vqmovun_s32
(
v_r
);
if
(
dcn
==
3
)
{
uint16x4x3_t
v_dst
;
v_dst
.
val
[
bidx
]
=
v_bd
;
v_dst
.
val
[
1
]
=
v_gd
;
v_dst
.
val
[
bidx
^
2
]
=
v_rd
;
vst3_u16
(
dst
,
v_dst
);
}
else
{
uint16x4x4_t
v_dst
;
v_dst
.
val
[
bidx
]
=
v_bd
;
v_dst
.
val
[
1
]
=
v_gd
;
v_dst
.
val
[
bidx
^
2
]
=
v_rd
;
v_dst
.
val
[
3
]
=
v_alpha2
;
vst4_u16
(
dst
,
v_dst
);
}
}
for
(
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
ushort
Y
=
src
[
i
];
ushort
Cr
=
src
[
i
+
1
];
ushort
Cb
=
src
[
i
+
2
];
int
b
=
Y
+
CV_DESCALE
((
Cb
-
delta
)
*
C3
,
yuv_shift
);
int
g
=
Y
+
CV_DESCALE
((
Cb
-
delta
)
*
C2
+
(
Cr
-
delta
)
*
C1
,
yuv_shift
);
int
r
=
Y
+
CV_DESCALE
((
Cr
-
delta
)
*
C0
,
yuv_shift
);
dst
[
bidx
]
=
saturate_cast
<
ushort
>
(
b
);
dst
[
1
]
=
saturate_cast
<
ushort
>
(
g
);
dst
[
bidx
^
2
]
=
saturate_cast
<
ushort
>
(
r
);
if
(
dcn
==
4
)
dst
[
3
]
=
alpha
;
}
}
int
dstcn
,
blueIdx
;
int
coeffs
[
4
];
int32x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_delta2
,
v_delta
;
uint16x8_t
v_alpha
;
uint16x4_t
v_alpha2
;
};
#endif
////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
static
const
float
sRGB2XYZ_D65
[]
=
{
0.412453
f
,
0.357580
f
,
0.180423
f
,
0.212671
f
,
0.715160
f
,
0.072169
f
,
0.019334
f
,
0.119193
f
,
0.950227
f
};
static
const
float
XYZ2sRGB_D65
[]
=
{
3.240479
f
,
-
1.53715
f
,
-
0.498535
f
,
-
0.969256
f
,
1.875991
f
,
0.041556
f
,
0.055648
f
,
-
0.204043
f
,
1.057311
f
};
...
...
@@ -1013,6 +1779,78 @@ template<typename _Tp> struct RGB2XYZ_f
float
coeffs
[
9
];
};
#if CV_NEON
template
<>
struct
RGB2XYZ_f
<
float
>
{
typedef
float
channel_type
;
RGB2XYZ_f
(
int
_srccn
,
int
blueIdx
,
const
float
*
_coeffs
)
:
srccn
(
_srccn
)
{
memcpy
(
coeffs
,
_coeffs
?
_coeffs
:
sRGB2XYZ_D65
,
9
*
sizeof
(
coeffs
[
0
]));
if
(
blueIdx
==
0
)
{
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
std
::
swap
(
coeffs
[
3
],
coeffs
[
5
]);
std
::
swap
(
coeffs
[
6
],
coeffs
[
8
]);
}
v_c0
=
vdupq_n_f32
(
coeffs
[
0
]);
v_c1
=
vdupq_n_f32
(
coeffs
[
1
]);
v_c2
=
vdupq_n_f32
(
coeffs
[
2
]);
v_c3
=
vdupq_n_f32
(
coeffs
[
3
]);
v_c4
=
vdupq_n_f32
(
coeffs
[
4
]);
v_c5
=
vdupq_n_f32
(
coeffs
[
5
]);
v_c6
=
vdupq_n_f32
(
coeffs
[
6
]);
v_c7
=
vdupq_n_f32
(
coeffs
[
7
]);
v_c8
=
vdupq_n_f32
(
coeffs
[
8
]);
}
void
operator
()(
const
float
*
src
,
float
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
i
=
0
;
float
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
],
C5
=
coeffs
[
5
],
C6
=
coeffs
[
6
],
C7
=
coeffs
[
7
],
C8
=
coeffs
[
8
];
n
*=
3
;
if
(
scn
==
3
)
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
12
)
{
float32x4x3_t
v_src
=
vld3q_f32
(
src
),
v_dst
;
v_dst
.
val
[
0
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c0
),
v_src
.
val
[
1
],
v_c1
),
v_src
.
val
[
2
],
v_c2
);
v_dst
.
val
[
1
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c3
),
v_src
.
val
[
1
],
v_c4
),
v_src
.
val
[
2
],
v_c5
);
v_dst
.
val
[
2
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c6
),
v_src
.
val
[
1
],
v_c7
),
v_src
.
val
[
2
],
v_c8
);
vst3q_f32
(
dst
+
i
,
v_dst
);
}
else
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
16
)
{
float32x4x4_t
v_src
=
vld4q_f32
(
src
);
float32x4x3_t
v_dst
;
v_dst
.
val
[
0
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c0
),
v_src
.
val
[
1
],
v_c1
),
v_src
.
val
[
2
],
v_c2
);
v_dst
.
val
[
1
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c3
),
v_src
.
val
[
1
],
v_c4
),
v_src
.
val
[
2
],
v_c5
);
v_dst
.
val
[
2
]
=
vmlaq_f32
(
vmlaq_f32
(
vmulq_f32
(
v_src
.
val
[
0
],
v_c6
),
v_src
.
val
[
1
],
v_c7
),
v_src
.
val
[
2
],
v_c8
);
vst3q_f32
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
{
float
X
=
saturate_cast
<
float
>
(
src
[
0
]
*
C0
+
src
[
1
]
*
C1
+
src
[
2
]
*
C2
);
float
Y
=
saturate_cast
<
float
>
(
src
[
0
]
*
C3
+
src
[
1
]
*
C4
+
src
[
2
]
*
C5
);
float
Z
=
saturate_cast
<
float
>
(
src
[
0
]
*
C6
+
src
[
1
]
*
C7
+
src
[
2
]
*
C8
);
dst
[
i
]
=
X
;
dst
[
i
+
1
]
=
Y
;
dst
[
i
+
2
]
=
Z
;
}
}
int
srccn
;
float
coeffs
[
9
];
float32x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_c4
,
v_c5
,
v_c6
,
v_c7
,
v_c8
;
};
#endif
template
<
typename
_Tp
>
struct
RGB2XYZ_i
{
...
...
@@ -1055,6 +1893,247 @@ template<typename _Tp> struct RGB2XYZ_i
int
coeffs
[
9
];
};
#if CV_NEON
template
<>
struct
RGB2XYZ_i
<
uchar
>
{
typedef
uchar
channel_type
;
RGB2XYZ_i
(
int
_srccn
,
int
blueIdx
,
const
float
*
_coeffs
)
:
srccn
(
_srccn
)
{
static
const
int
coeffs0
[]
=
{
1689
,
1465
,
739
,
871
,
2929
,
296
,
79
,
488
,
3892
};
for
(
int
i
=
0
;
i
<
9
;
i
++
)
coeffs
[
i
]
=
_coeffs
?
cvRound
(
_coeffs
[
i
]
*
(
1
<<
xyz_shift
))
:
coeffs0
[
i
];
if
(
blueIdx
==
0
)
{
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
std
::
swap
(
coeffs
[
3
],
coeffs
[
5
]);
std
::
swap
(
coeffs
[
6
],
coeffs
[
8
]);
}
v_c0
=
vdup_n_u16
(
coeffs
[
0
]);
v_c1
=
vdup_n_u16
(
coeffs
[
1
]);
v_c2
=
vdup_n_u16
(
coeffs
[
2
]);
v_c3
=
vdup_n_u16
(
coeffs
[
3
]);
v_c4
=
vdup_n_u16
(
coeffs
[
4
]);
v_c5
=
vdup_n_u16
(
coeffs
[
5
]);
v_c6
=
vdup_n_u16
(
coeffs
[
6
]);
v_c7
=
vdup_n_u16
(
coeffs
[
7
]);
v_c8
=
vdup_n_u16
(
coeffs
[
8
]);
v_delta
=
vdupq_n_u32
(
1
<<
(
xyz_shift
-
1
));
}
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
i
=
0
;
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
],
C5
=
coeffs
[
5
],
C6
=
coeffs
[
6
],
C7
=
coeffs
[
7
],
C8
=
coeffs
[
8
];
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
src
+=
scn
*
8
)
{
uint8x8x3_t
v_dst
;
uint16x8x3_t
v_src16
;
if
(
scn
==
3
)
{
uint8x8x3_t
v_src
=
vld3_u8
(
src
);
v_src16
.
val
[
0
]
=
vmovl_u8
(
v_src
.
val
[
0
]);
v_src16
.
val
[
1
]
=
vmovl_u8
(
v_src
.
val
[
1
]);
v_src16
.
val
[
2
]
=
vmovl_u8
(
v_src
.
val
[
2
]);
}
else
{
uint8x8x4_t
v_src
=
vld4_u8
(
src
);
v_src16
.
val
[
0
]
=
vmovl_u8
(
v_src
.
val
[
0
]);
v_src16
.
val
[
1
]
=
vmovl_u8
(
v_src
.
val
[
1
]);
v_src16
.
val
[
2
]
=
vmovl_u8
(
v_src
.
val
[
2
]);
}
uint16x4_t
v_s0
=
vget_low_u16
(
v_src16
.
val
[
0
]),
v_s1
=
vget_low_u16
(
v_src16
.
val
[
1
]),
v_s2
=
vget_low_u16
(
v_src16
.
val
[
2
]);
uint32x4_t
v_X0
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
uint32x4_t
v_Y0
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
uint32x4_t
v_Z0
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X0
=
vshrq_n_u32
(
vaddq_u32
(
v_X0
,
v_delta
),
xyz_shift
);
v_Y0
=
vshrq_n_u32
(
vaddq_u32
(
v_Y0
,
v_delta
),
xyz_shift
);
v_Z0
=
vshrq_n_u32
(
vaddq_u32
(
v_Z0
,
v_delta
),
xyz_shift
);
v_s0
=
vget_high_u16
(
v_src16
.
val
[
0
]),
v_s1
=
vget_high_u16
(
v_src16
.
val
[
1
]),
v_s2
=
vget_high_u16
(
v_src16
.
val
[
2
]);
uint32x4_t
v_X1
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
uint32x4_t
v_Y1
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
uint32x4_t
v_Z1
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X1
=
vshrq_n_u32
(
vaddq_u32
(
v_X1
,
v_delta
),
xyz_shift
);
v_Y1
=
vshrq_n_u32
(
vaddq_u32
(
v_Y1
,
v_delta
),
xyz_shift
);
v_Z1
=
vshrq_n_u32
(
vaddq_u32
(
v_Z1
,
v_delta
),
xyz_shift
);
v_dst
.
val
[
0
]
=
vqmovn_u16
(
vcombine_u16
(
vmovn_u32
(
v_X0
),
vmovn_u32
(
v_X1
)));
v_dst
.
val
[
1
]
=
vqmovn_u16
(
vcombine_u16
(
vmovn_u32
(
v_Y0
),
vmovn_u32
(
v_Y1
)));
v_dst
.
val
[
2
]
=
vqmovn_u16
(
vcombine_u16
(
vmovn_u32
(
v_Z0
),
vmovn_u32
(
v_Z1
)));
vst3_u8
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
{
int
X
=
CV_DESCALE
(
src
[
0
]
*
C0
+
src
[
1
]
*
C1
+
src
[
2
]
*
C2
,
xyz_shift
);
int
Y
=
CV_DESCALE
(
src
[
0
]
*
C3
+
src
[
1
]
*
C4
+
src
[
2
]
*
C5
,
xyz_shift
);
int
Z
=
CV_DESCALE
(
src
[
0
]
*
C6
+
src
[
1
]
*
C7
+
src
[
2
]
*
C8
,
xyz_shift
);
dst
[
i
]
=
saturate_cast
<
uchar
>
(
X
);
dst
[
i
+
1
]
=
saturate_cast
<
uchar
>
(
Y
);
dst
[
i
+
2
]
=
saturate_cast
<
uchar
>
(
Z
);
}
}
int
srccn
,
coeffs
[
9
];
uint16x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_c4
,
v_c5
,
v_c6
,
v_c7
,
v_c8
;
uint32x4_t
v_delta
;
};
template
<>
struct
RGB2XYZ_i
<
ushort
>
{
typedef
ushort
channel_type
;
RGB2XYZ_i
(
int
_srccn
,
int
blueIdx
,
const
float
*
_coeffs
)
:
srccn
(
_srccn
)
{
static
const
int
coeffs0
[]
=
{
1689
,
1465
,
739
,
871
,
2929
,
296
,
79
,
488
,
3892
};
for
(
int
i
=
0
;
i
<
9
;
i
++
)
coeffs
[
i
]
=
_coeffs
?
cvRound
(
_coeffs
[
i
]
*
(
1
<<
xyz_shift
))
:
coeffs0
[
i
];
if
(
blueIdx
==
0
)
{
std
::
swap
(
coeffs
[
0
],
coeffs
[
2
]);
std
::
swap
(
coeffs
[
3
],
coeffs
[
5
]);
std
::
swap
(
coeffs
[
6
],
coeffs
[
8
]);
}
v_c0
=
vdup_n_u16
(
coeffs
[
0
]);
v_c1
=
vdup_n_u16
(
coeffs
[
1
]);
v_c2
=
vdup_n_u16
(
coeffs
[
2
]);
v_c3
=
vdup_n_u16
(
coeffs
[
3
]);
v_c4
=
vdup_n_u16
(
coeffs
[
4
]);
v_c5
=
vdup_n_u16
(
coeffs
[
5
]);
v_c6
=
vdup_n_u16
(
coeffs
[
6
]);
v_c7
=
vdup_n_u16
(
coeffs
[
7
]);
v_c8
=
vdup_n_u16
(
coeffs
[
8
]);
v_delta
=
vdupq_n_u32
(
1
<<
(
xyz_shift
-
1
));
}
void
operator
()(
const
ushort
*
src
,
ushort
*
dst
,
int
n
)
const
{
int
scn
=
srccn
,
i
=
0
;
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
],
C5
=
coeffs
[
5
],
C6
=
coeffs
[
6
],
C7
=
coeffs
[
7
],
C8
=
coeffs
[
8
];
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
src
+=
scn
*
8
)
{
uint16x8x3_t
v_src
,
v_dst
;
if
(
scn
==
3
)
v_src
=
vld3q_u16
(
src
);
else
{
uint16x8x4_t
v_src4
=
vld4q_u16
(
src
);
v_src
.
val
[
0
]
=
v_src4
.
val
[
0
];
v_src
.
val
[
1
]
=
v_src4
.
val
[
1
];
v_src
.
val
[
2
]
=
v_src4
.
val
[
2
];
}
uint16x4_t
v_s0
=
vget_low_u16
(
v_src
.
val
[
0
]),
v_s1
=
vget_low_u16
(
v_src
.
val
[
1
]),
v_s2
=
vget_low_u16
(
v_src
.
val
[
2
]);
uint32x4_t
v_X0
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
uint32x4_t
v_Y0
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
uint32x4_t
v_Z0
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X0
=
vshrq_n_u32
(
vaddq_u32
(
v_X0
,
v_delta
),
xyz_shift
);
v_Y0
=
vshrq_n_u32
(
vaddq_u32
(
v_Y0
,
v_delta
),
xyz_shift
);
v_Z0
=
vshrq_n_u32
(
vaddq_u32
(
v_Z0
,
v_delta
),
xyz_shift
);
v_s0
=
vget_high_u16
(
v_src
.
val
[
0
]),
v_s1
=
vget_high_u16
(
v_src
.
val
[
1
]),
v_s2
=
vget_high_u16
(
v_src
.
val
[
2
]);
uint32x4_t
v_X1
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
uint32x4_t
v_Y1
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
uint32x4_t
v_Z1
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X1
=
vshrq_n_u32
(
vaddq_u32
(
v_X1
,
v_delta
),
xyz_shift
);
v_Y1
=
vshrq_n_u32
(
vaddq_u32
(
v_Y1
,
v_delta
),
xyz_shift
);
v_Z1
=
vshrq_n_u32
(
vaddq_u32
(
v_Z1
,
v_delta
),
xyz_shift
);
v_dst
.
val
[
0
]
=
vcombine_u16
(
vqmovn_u32
(
v_X0
),
vqmovn_u32
(
v_X1
));
v_dst
.
val
[
1
]
=
vcombine_u16
(
vqmovn_u32
(
v_Y0
),
vqmovn_u32
(
v_Y1
));
v_dst
.
val
[
2
]
=
vcombine_u16
(
vqmovn_u32
(
v_Z0
),
vqmovn_u32
(
v_Z1
));
vst3q_u16
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<=
n
-
12
;
i
+=
12
,
src
+=
scn
*
4
)
{
uint16x4x3_t
v_dst
;
uint16x4_t
v_s0
,
v_s1
,
v_s2
;
if
(
scn
==
3
)
{
uint16x4x3_t
v_src
=
vld3_u16
(
src
);
v_s0
=
v_src
.
val
[
0
];
v_s1
=
v_src
.
val
[
1
];
v_s2
=
v_src
.
val
[
2
];
}
else
{
uint16x4x4_t
v_src
=
vld4_u16
(
src
);
v_s0
=
v_src
.
val
[
0
];
v_s1
=
v_src
.
val
[
1
];
v_s2
=
v_src
.
val
[
2
];
}
uint32x4_t
v_X
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
uint32x4_t
v_Y
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
uint32x4_t
v_Z
=
vmlal_u16
(
vmlal_u16
(
vmull_u16
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_dst
.
val
[
0
]
=
vqmovn_u32
(
vshrq_n_u32
(
vaddq_u32
(
v_X
,
v_delta
),
xyz_shift
));
v_dst
.
val
[
1
]
=
vqmovn_u32
(
vshrq_n_u32
(
vaddq_u32
(
v_Y
,
v_delta
),
xyz_shift
));
v_dst
.
val
[
2
]
=
vqmovn_u32
(
vshrq_n_u32
(
vaddq_u32
(
v_Z
,
v_delta
),
xyz_shift
));
vst3_u16
(
dst
+
i
,
v_dst
);
}
for
(
;
i
<
n
;
i
+=
3
,
src
+=
scn
)
{
int
X
=
CV_DESCALE
(
src
[
0
]
*
C0
+
src
[
1
]
*
C1
+
src
[
2
]
*
C2
,
xyz_shift
);
int
Y
=
CV_DESCALE
(
src
[
0
]
*
C3
+
src
[
1
]
*
C4
+
src
[
2
]
*
C5
,
xyz_shift
);
int
Z
=
CV_DESCALE
(
src
[
0
]
*
C6
+
src
[
1
]
*
C7
+
src
[
2
]
*
C8
,
xyz_shift
);
dst
[
i
]
=
saturate_cast
<
ushort
>
(
X
);
dst
[
i
+
1
]
=
saturate_cast
<
ushort
>
(
Y
);
dst
[
i
+
2
]
=
saturate_cast
<
ushort
>
(
Z
);
}
}
int
srccn
,
coeffs
[
9
];
uint16x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_c4
,
v_c5
,
v_c6
,
v_c7
,
v_c8
;
uint32x4_t
v_delta
;
};
#endif
template
<
typename
_Tp
>
struct
XYZ2RGB_f
{
...
...
@@ -1141,6 +2220,278 @@ template<typename _Tp> struct XYZ2RGB_i
int
coeffs
[
9
];
};
#if CV_NEON
template
<>
struct
XYZ2RGB_i
<
uchar
>
{
typedef
uchar
channel_type
;
XYZ2RGB_i
(
int
_dstcn
,
int
_blueIdx
,
const
int
*
_coeffs
)
:
dstcn
(
_dstcn
),
blueIdx
(
_blueIdx
)
{
static
const
int
coeffs0
[]
=
{
13273
,
-
6296
,
-
2042
,
-
3970
,
7684
,
170
,
228
,
-
836
,
4331
};
for
(
int
i
=
0
;
i
<
9
;
i
++
)
coeffs
[
i
]
=
_coeffs
?
cvRound
(
_coeffs
[
i
]
*
(
1
<<
xyz_shift
))
:
coeffs0
[
i
];
if
(
blueIdx
==
0
)
{
std
::
swap
(
coeffs
[
0
],
coeffs
[
6
]);
std
::
swap
(
coeffs
[
1
],
coeffs
[
7
]);
std
::
swap
(
coeffs
[
2
],
coeffs
[
8
]);
}
v_c0
=
vdup_n_s16
(
coeffs
[
0
]);
v_c1
=
vdup_n_s16
(
coeffs
[
1
]);
v_c2
=
vdup_n_s16
(
coeffs
[
2
]);
v_c3
=
vdup_n_s16
(
coeffs
[
3
]);
v_c4
=
vdup_n_s16
(
coeffs
[
4
]);
v_c5
=
vdup_n_s16
(
coeffs
[
5
]);
v_c6
=
vdup_n_s16
(
coeffs
[
6
]);
v_c7
=
vdup_n_s16
(
coeffs
[
7
]);
v_c8
=
vdup_n_s16
(
coeffs
[
8
]);
v_delta
=
vdupq_n_s32
(
1
<<
(
xyz_shift
-
1
));
v_alpha
=
vmovn_u16
(
vdupq_n_u16
(
ColorChannel
<
uchar
>::
max
()));
}
void
operator
()(
const
uchar
*
src
,
uchar
*
dst
,
int
n
)
const
{
int
dcn
=
dstcn
,
i
=
0
;
uchar
alpha
=
ColorChannel
<
uchar
>::
max
();
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
],
C5
=
coeffs
[
5
],
C6
=
coeffs
[
6
],
C7
=
coeffs
[
7
],
C8
=
coeffs
[
8
];
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
uint8x8x3_t
v_src
=
vld3_u8
(
src
+
i
);
int16x8x3_t
v_src16
;
v_src16
.
val
[
0
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
0
]));
v_src16
.
val
[
1
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
1
]));
v_src16
.
val
[
2
]
=
vreinterpretq_s16_u16
(
vmovl_u8
(
v_src
.
val
[
2
]));
int16x4_t
v_s0
=
vget_low_s16
(
v_src16
.
val
[
0
]),
v_s1
=
vget_low_s16
(
v_src16
.
val
[
1
]),
v_s2
=
vget_low_s16
(
v_src16
.
val
[
2
]);
int32x4_t
v_X0
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
int32x4_t
v_Y0
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
int32x4_t
v_Z0
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X0
=
vshrq_n_s32
(
vaddq_s32
(
v_X0
,
v_delta
),
xyz_shift
);
v_Y0
=
vshrq_n_s32
(
vaddq_s32
(
v_Y0
,
v_delta
),
xyz_shift
);
v_Z0
=
vshrq_n_s32
(
vaddq_s32
(
v_Z0
,
v_delta
),
xyz_shift
);
v_s0
=
vget_high_s16
(
v_src16
.
val
[
0
]),
v_s1
=
vget_high_s16
(
v_src16
.
val
[
1
]),
v_s2
=
vget_high_s16
(
v_src16
.
val
[
2
]);
int32x4_t
v_X1
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
int32x4_t
v_Y1
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
int32x4_t
v_Z1
=
vmlal_s16
(
vmlal_s16
(
vmull_s16
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X1
=
vshrq_n_s32
(
vaddq_s32
(
v_X1
,
v_delta
),
xyz_shift
);
v_Y1
=
vshrq_n_s32
(
vaddq_s32
(
v_Y1
,
v_delta
),
xyz_shift
);
v_Z1
=
vshrq_n_s32
(
vaddq_s32
(
v_Z1
,
v_delta
),
xyz_shift
);
uint8x8_t
v_b
=
vqmovun_s16
(
vcombine_s16
(
vqmovn_s32
(
v_X0
),
vqmovn_s32
(
v_X1
)));
uint8x8_t
v_g
=
vqmovun_s16
(
vcombine_s16
(
vqmovn_s32
(
v_Y0
),
vqmovn_s32
(
v_Y1
)));
uint8x8_t
v_r
=
vqmovun_s16
(
vcombine_s16
(
vqmovn_s32
(
v_Z0
),
vqmovn_s32
(
v_Z1
)));
if
(
dcn
==
3
)
{
uint8x8x3_t
v_dst
;
v_dst
.
val
[
0
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
2
]
=
v_r
;
vst3_u8
(
dst
,
v_dst
);
}
else
{
uint8x8x4_t
v_dst
;
v_dst
.
val
[
0
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
2
]
=
v_r
;
v_dst
.
val
[
3
]
=
v_alpha
;
vst4_u8
(
dst
,
v_dst
);
}
}
for
(
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
int
B
=
CV_DESCALE
(
src
[
i
]
*
C0
+
src
[
i
+
1
]
*
C1
+
src
[
i
+
2
]
*
C2
,
xyz_shift
);
int
G
=
CV_DESCALE
(
src
[
i
]
*
C3
+
src
[
i
+
1
]
*
C4
+
src
[
i
+
2
]
*
C5
,
xyz_shift
);
int
R
=
CV_DESCALE
(
src
[
i
]
*
C6
+
src
[
i
+
1
]
*
C7
+
src
[
i
+
2
]
*
C8
,
xyz_shift
);
dst
[
0
]
=
saturate_cast
<
uchar
>
(
B
);
dst
[
1
]
=
saturate_cast
<
uchar
>
(
G
);
dst
[
2
]
=
saturate_cast
<
uchar
>
(
R
);
if
(
dcn
==
4
)
dst
[
3
]
=
alpha
;
}
}
int
dstcn
,
blueIdx
;
int
coeffs
[
9
];
int16x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_c4
,
v_c5
,
v_c6
,
v_c7
,
v_c8
;
uint8x8_t
v_alpha
;
int32x4_t
v_delta
;
};
template
<>
struct
XYZ2RGB_i
<
ushort
>
{
typedef
ushort
channel_type
;
XYZ2RGB_i
(
int
_dstcn
,
int
_blueIdx
,
const
int
*
_coeffs
)
:
dstcn
(
_dstcn
),
blueIdx
(
_blueIdx
)
{
static
const
int
coeffs0
[]
=
{
13273
,
-
6296
,
-
2042
,
-
3970
,
7684
,
170
,
228
,
-
836
,
4331
};
for
(
int
i
=
0
;
i
<
9
;
i
++
)
coeffs
[
i
]
=
_coeffs
?
cvRound
(
_coeffs
[
i
]
*
(
1
<<
xyz_shift
))
:
coeffs0
[
i
];
if
(
blueIdx
==
0
)
{
std
::
swap
(
coeffs
[
0
],
coeffs
[
6
]);
std
::
swap
(
coeffs
[
1
],
coeffs
[
7
]);
std
::
swap
(
coeffs
[
2
],
coeffs
[
8
]);
}
v_c0
=
vdupq_n_s32
(
coeffs
[
0
]);
v_c1
=
vdupq_n_s32
(
coeffs
[
1
]);
v_c2
=
vdupq_n_s32
(
coeffs
[
2
]);
v_c3
=
vdupq_n_s32
(
coeffs
[
3
]);
v_c4
=
vdupq_n_s32
(
coeffs
[
4
]);
v_c5
=
vdupq_n_s32
(
coeffs
[
5
]);
v_c6
=
vdupq_n_s32
(
coeffs
[
6
]);
v_c7
=
vdupq_n_s32
(
coeffs
[
7
]);
v_c8
=
vdupq_n_s32
(
coeffs
[
8
]);
v_delta
=
vdupq_n_s32
(
1
<<
(
xyz_shift
-
1
));
v_alpha
=
vdupq_n_u16
(
ColorChannel
<
ushort
>::
max
());
v_alpha2
=
vget_low_u16
(
v_alpha
);
}
void
operator
()(
const
ushort
*
src
,
ushort
*
dst
,
int
n
)
const
{
int
dcn
=
dstcn
,
i
=
0
;
ushort
alpha
=
ColorChannel
<
ushort
>::
max
();
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
],
C4
=
coeffs
[
4
],
C5
=
coeffs
[
5
],
C6
=
coeffs
[
6
],
C7
=
coeffs
[
7
],
C8
=
coeffs
[
8
];
n
*=
3
;
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
uint16x8x3_t
v_src
=
vld3q_u16
(
src
+
i
);
int32x4_t
v_s0
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
0
]))),
v_s1
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
1
]))),
v_s2
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
.
val
[
2
])));
int32x4_t
v_X0
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
int32x4_t
v_Y0
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
int32x4_t
v_Z0
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X0
=
vshrq_n_s32
(
vaddq_s32
(
v_X0
,
v_delta
),
xyz_shift
);
v_Y0
=
vshrq_n_s32
(
vaddq_s32
(
v_Y0
,
v_delta
),
xyz_shift
);
v_Z0
=
vshrq_n_s32
(
vaddq_s32
(
v_Z0
,
v_delta
),
xyz_shift
);
v_s0
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
0
])));
v_s1
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
1
])));
v_s2
=
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
.
val
[
2
])));
int32x4_t
v_X1
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
int32x4_t
v_Y1
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
int32x4_t
v_Z1
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X1
=
vshrq_n_s32
(
vaddq_s32
(
v_X1
,
v_delta
),
xyz_shift
);
v_Y1
=
vshrq_n_s32
(
vaddq_s32
(
v_Y1
,
v_delta
),
xyz_shift
);
v_Z1
=
vshrq_n_s32
(
vaddq_s32
(
v_Z1
,
v_delta
),
xyz_shift
);
uint16x8_t
v_b
=
vcombine_u16
(
vqmovun_s32
(
v_X0
),
vqmovun_s32
(
v_X1
));
uint16x8_t
v_g
=
vcombine_u16
(
vqmovun_s32
(
v_Y0
),
vqmovun_s32
(
v_Y1
));
uint16x8_t
v_r
=
vcombine_u16
(
vqmovun_s32
(
v_Z0
),
vqmovun_s32
(
v_Z1
));
if
(
dcn
==
3
)
{
uint16x8x3_t
v_dst
;
v_dst
.
val
[
0
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
2
]
=
v_r
;
vst3q_u16
(
dst
,
v_dst
);
}
else
{
uint16x8x4_t
v_dst
;
v_dst
.
val
[
0
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
2
]
=
v_r
;
v_dst
.
val
[
3
]
=
v_alpha
;
vst4q_u16
(
dst
,
v_dst
);
}
}
for
(
;
i
<=
n
-
12
;
i
+=
12
,
dst
+=
dcn
*
4
)
{
uint16x4x3_t
v_src
=
vld3_u16
(
src
+
i
);
int32x4_t
v_s0
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
0
])),
v_s1
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
1
])),
v_s2
=
vreinterpretq_s32_u32
(
vmovl_u16
(
v_src
.
val
[
2
]));
int32x4_t
v_X
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c0
),
v_s1
,
v_c1
),
v_s2
,
v_c2
);
int32x4_t
v_Y
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c3
),
v_s1
,
v_c4
),
v_s2
,
v_c5
);
int32x4_t
v_Z
=
vmlaq_s32
(
vmlaq_s32
(
vmulq_s32
(
v_s0
,
v_c6
),
v_s1
,
v_c7
),
v_s2
,
v_c8
);
v_X
=
vshrq_n_s32
(
vaddq_s32
(
v_X
,
v_delta
),
xyz_shift
);
v_Y
=
vshrq_n_s32
(
vaddq_s32
(
v_Y
,
v_delta
),
xyz_shift
);
v_Z
=
vshrq_n_s32
(
vaddq_s32
(
v_Z
,
v_delta
),
xyz_shift
);
uint16x4_t
v_b
=
vqmovun_s32
(
v_X
);
uint16x4_t
v_g
=
vqmovun_s32
(
v_Y
);
uint16x4_t
v_r
=
vqmovun_s32
(
v_Z
);
if
(
dcn
==
3
)
{
uint16x4x3_t
v_dst
;
v_dst
.
val
[
0
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
2
]
=
v_r
;
vst3_u16
(
dst
,
v_dst
);
}
else
{
uint16x4x4_t
v_dst
;
v_dst
.
val
[
0
]
=
v_b
;
v_dst
.
val
[
1
]
=
v_g
;
v_dst
.
val
[
2
]
=
v_r
;
v_dst
.
val
[
3
]
=
v_alpha2
;
vst4_u16
(
dst
,
v_dst
);
}
}
for
(
;
i
<
n
;
i
+=
3
,
dst
+=
dcn
)
{
int
B
=
CV_DESCALE
(
src
[
i
]
*
C0
+
src
[
i
+
1
]
*
C1
+
src
[
i
+
2
]
*
C2
,
xyz_shift
);
int
G
=
CV_DESCALE
(
src
[
i
]
*
C3
+
src
[
i
+
1
]
*
C4
+
src
[
i
+
2
]
*
C5
,
xyz_shift
);
int
R
=
CV_DESCALE
(
src
[
i
]
*
C6
+
src
[
i
+
1
]
*
C7
+
src
[
i
+
2
]
*
C8
,
xyz_shift
);
dst
[
0
]
=
saturate_cast
<
ushort
>
(
B
);
dst
[
1
]
=
saturate_cast
<
ushort
>
(
G
);
dst
[
2
]
=
saturate_cast
<
ushort
>
(
R
);
if
(
dcn
==
4
)
dst
[
3
]
=
alpha
;
}
}
int
dstcn
,
blueIdx
;
int
coeffs
[
9
];
int32x4_t
v_c0
,
v_c1
,
v_c2
,
v_c3
,
v_c4
,
v_c5
,
v_c6
,
v_c7
,
v_c8
,
v_delta
;
uint16x4_t
v_alpha2
;
uint16x8_t
v_alpha
;
};
#endif
////////////////////////////////////// RGB <-> HSV ///////////////////////////////////////
...
...
modules/imgproc/src/imgwarp.cpp
View file @
a54f6bb0
...
...
@@ -3580,14 +3580,6 @@ public:
_mm_storeu_si128
((
__m128i
*
)(
XY
+
x1
*
2
+
8
),
iy1
);
}
}
#elif CV_NEON
for
(
;
x1
<=
bcols
-
4
;
x1
+=
4
)
{
int32x4_t
v_sx
=
cv_vrndq_s32_f32
(
vld1q_f32
(
sX
+
x1
)),
v_sy
=
cv_vrndq_s32_f32
(
vld1q_f32
(
sY
+
x1
));
int16x4x2_t
v_dst
=
vzip_s16
(
vqmovn_s32
(
v_sx
),
vqmovn_s32
(
v_sy
));
vst1q_s16
(
XY
+
(
x1
<<
1
),
vcombine_s16
(
v_dst
.
val
[
0
],
v_dst
.
val
[
1
]));
}
#endif
for
(
;
x1
<
bcols
;
x1
++
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment