Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
2d813910
Commit
2d813910
authored
Sep 20, 2017
by
Alexander Alekhin
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #9669 from kraj:master
parents
c57aef75
2c7a123e
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
104 additions
and
104 deletions
+104
-104
channel_extract.cpp
3rdparty/carotene/src/channel_extract.cpp
+2
-2
channels_combine.cpp
3rdparty/carotene/src/channels_combine.cpp
+1
-1
colorconvert.cpp
3rdparty/carotene/src/colorconvert.cpp
+39
-39
convert.cpp
3rdparty/carotene/src/convert.cpp
+27
-27
convert_scale.cpp
3rdparty/carotene/src/convert_scale.cpp
+28
-28
gaussian_blur.cpp
3rdparty/carotene/src/gaussian_blur.cpp
+1
-1
pyramid.cpp
3rdparty/carotene/src/pyramid.cpp
+4
-4
scharr.cpp
3rdparty/carotene/src/scharr.cpp
+2
-2
No files found.
3rdparty/carotene/src/channel_extract.cpp
View file @
2d813910
...
...
@@ -231,7 +231,7 @@ void extract4(const Size2D &size,
srcStride == dst2Stride && \
srcStride == dst3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define SPLIT_ASM2(sgn, bits) __asm__ ( \
"vld2." #bits " {d0, d2}, [%[in0]] \n\t" \
...
...
@@ -351,7 +351,7 @@ void extract4(const Size2D &size,
} \
}
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define ALPHA_QUAD(sgn, bits) { \
internal::prefetch(src + sj); \
...
...
3rdparty/carotene/src/channels_combine.cpp
View file @
2d813910
...
...
@@ -77,7 +77,7 @@ namespace CAROTENE_NS {
dstStride == src2Stride && \
dstStride == src3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define MERGE_ASM2(sgn, bits) __asm__ ( \
"vld1." #bits " {d0-d1}, [%[in0]] \n\t" \
...
...
3rdparty/carotene/src/colorconvert.cpp
View file @
2d813910
...
...
@@ -97,7 +97,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -116,7 +116,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
24
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -198,7 +198,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -217,7 +217,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
32
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -300,7 +300,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -319,7 +319,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
24
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -402,7 +402,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space,
const
u32
G2Y
=
color_space
==
COLOR_SPACE_BT601
?
G2Y_BT601
:
G2Y_BT709
;
const
u32
B2Y
=
color_space
==
COLOR_SPACE_BT601
?
B2Y_BT601
:
B2Y_BT709
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
int16x4_t
v_r2y
asm
(
"d31"
)
=
vmov_n_s16
(
R2Y
);
register
int16x4_t
v_g2y
asm
(
"d30"
)
=
vmov_n_s16
(
G2Y
);
register
int16x4_t
v_b2y
asm
(
"d29"
)
=
vmov_n_s16
(
B2Y
);
...
...
@@ -421,7 +421,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space,
u8
*
dst
=
internal
::
getRowPtr
(
dstBase
,
dstStride
,
i
);
size_t
sj
=
0u
,
dj
=
0u
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
for
(;
dj
<
roiw8
;
sj
+=
32
,
dj
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
...
...
@@ -512,7 +512,7 @@ void gray2rgb(const Size2D &size,
for
(;
sj
<
roiw16
;
sj
+=
16
,
dj
+=
48
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0-d1}, [%[in0]]
\n\t
"
"vmov.8 q1, q0
\n\t
"
...
...
@@ -538,7 +538,7 @@ void gray2rgb(const Size2D &size,
if
(
sj
<
roiw8
)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0}, [%[in]]
\n\t
"
"vmov.8 d1, d0
\n\t
"
...
...
@@ -584,7 +584,7 @@ void gray2rgbx(const Size2D &size,
size_t
roiw16
=
size
.
width
>=
15
?
size
.
width
-
15
:
0
;
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
uint8x16_t
vc255
asm
(
"q4"
)
=
vmovq_n_u8
(
255
);
#else
uint8x16x4_t
vRgba
;
...
...
@@ -602,7 +602,7 @@ void gray2rgbx(const Size2D &size,
for
(;
sj
<
roiw16
;
sj
+=
16
,
dj
+=
64
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0-d1}, [%[in0]]
\n\t
"
"vmov.8 q1, q0
\n\t
"
...
...
@@ -628,7 +628,7 @@ void gray2rgbx(const Size2D &size,
if
(
sj
<
roiw8
)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d5}, [%[in]]
\n\t
"
"vmov.8 d6, d5
\n\t
"
...
...
@@ -1409,7 +1409,7 @@ inline void convertToHSV(const s32 r, const s32 g, const s32 b,
"d24","d25","d26","d27","d28","d29","d30","d31" \
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define YCRCB_CONSTS \
register int16x4_t vcYR asm ("d31") = vmov_n_s16(4899); \
...
...
@@ -1555,7 +1555,7 @@ inline uint8x8x3_t convertToYCrCb( const int16x8_t& vR, const int16x8_t& vG, con
#define COEFF_G ( 8663)
#define COEFF_B (-17705)
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
#define YUV420ALPHA3_CONST
#define YUV420ALPHA4_CONST register uint8x16_t c255 asm ("q13") = vmovq_n_u8(255);
#define YUV420ALPHA3_CONVERT
...
...
@@ -1852,7 +1852,7 @@ void rgb2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -1871,7 +1871,7 @@ void rgb2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld3
.8
{
d0
-
d2
},
d0
,
d2
)
#else
uint8x8x3_t
vRgb
=
vld3_u8
(
src
+
sj
);
...
...
@@ -1904,7 +1904,7 @@ void rgbx2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -1923,7 +1923,7 @@ void rgbx2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld4
.8
{
d0
-
d3
},
d0
,
d2
)
#else
uint8x8x4_t
vRgb
=
vld4_u8
(
src
+
sj
);
...
...
@@ -1956,7 +1956,7 @@ void bgr2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -1975,7 +1975,7 @@ void bgr2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld3
.8
{
d0
-
d2
},
d2
,
d0
)
#else
uint8x8x3_t
vRgb
=
vld3_u8
(
src
+
sj
);
...
...
@@ -2008,7 +2008,7 @@ void bgrx2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t
roiw8
=
size
.
width
>=
7
?
size
.
width
-
7
:
0
;
const
s32
hsv_shift
=
12
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
const
f32
vsdiv_table
=
f32
(
255
<<
hsv_shift
);
register
f32
vhdiv_table
=
f32
(
hrange
<<
hsv_shift
);
register
const
s32
vhrange
=
hrange
;
...
...
@@ -2027,7 +2027,7 @@ void bgrx2hsv(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM
(
vld4
.8
{
d0
-
d3
},
d2
,
d0
)
#else
uint8x8x4_t
vRgb
=
vld4_u8
(
src
+
sj
);
...
...
@@ -2068,7 +2068,7 @@ void rgbx2bgr565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
64
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld4.8 {d2, d4, d6, d8}, [%[in0]] @ q0 q1 q2 q3 q4
\n\t
"
"vld4.8 {d3, d5, d7, d9}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx
\n\t
"
...
...
@@ -2122,7 +2122,7 @@ void rgb2bgr565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
48
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld3.8 {d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 q4
\n\t
"
"vld3.8 {d3, d5, d7}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx
\n\t
"
...
...
@@ -2176,7 +2176,7 @@ void rgbx2rgb565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
64
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld4.8 {d0, d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3
\n\t
"
"vld4.8 {d1, d3, d5, d7}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB aaaaAAAA
\n\t
"
...
...
@@ -2230,7 +2230,7 @@ void rgb2rgb565(const Size2D &size,
for
(;
j
<
roiw16
;
sj
+=
48
,
dj
+=
32
,
j
+=
16
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld3.8 {d0, d2, d4}, [%[in0]] @ q0 q1 q2 q3
\n\t
"
"vld3.8 {d1, d3, d5}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx
\n\t
"
...
...
@@ -2285,7 +2285,7 @@ void rgb2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld3
.8
{
d0
-
d2
},
d0
,
d1
,
d2
)
#else
uint8x8x3_t
vRgb
=
vld3_u8
(
src
+
sj
);
...
...
@@ -2329,7 +2329,7 @@ void rgbx2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld4
.8
{
d0
-
d3
},
d0
,
d1
,
d2
)
#else
uint8x8x4_t
vRgba
=
vld4_u8
(
src
+
sj
);
...
...
@@ -2373,7 +2373,7 @@ void bgr2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
24
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld3
.8
{
d0
-
d2
},
d2
,
d1
,
d0
)
#else
uint8x8x3_t
vBgr
=
vld3_u8
(
src
+
sj
);
...
...
@@ -2417,7 +2417,7 @@ void bgrx2ycrcb(const Size2D &size,
for
(;
j
<
roiw8
;
sj
+=
32
,
dj
+=
24
,
j
+=
8
)
{
internal
::
prefetch
(
src
+
sj
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB
(
vld4
.8
{
d0
-
d3
},
d2
,
d1
,
d0
)
#else
uint8x8x4_t
vBgra
=
vld4_u8
(
src
+
sj
);
...
...
@@ -2499,7 +2499,7 @@ void yuv420sp2rgb(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d1
,
d0
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2545,7 +2545,7 @@ void yuv420sp2rgbx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d1
,
d0
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2591,7 +2591,7 @@ void yuv420i2rgb(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d0
,
d1
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2637,7 +2637,7 @@ void yuv420i2rgbx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d0
,
d1
,
q5
,
q6
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2683,7 +2683,7 @@ void yuv420sp2bgr(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d1
,
d0
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2729,7 +2729,7 @@ void yuv420sp2bgrx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d1
,
d0
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2775,7 +2775,7 @@ void yuv420i2bgr(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
3
,
d0
,
d1
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
@@ -2821,7 +2821,7 @@ void yuv420i2bgrx(const Size2D &size,
internal
::
prefetch
(
uv
+
j
);
internal
::
prefetch
(
y1
+
j
);
internal
::
prefetch
(
y2
+
j
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB
(
4
,
d0
,
d1
,
q6
,
q5
)
#else
convertYUV420
.
ToRGB
(
y1
+
j
,
y2
+
j
,
uv
+
j
,
dst1
+
dj
,
dst2
+
dj
);
...
...
3rdparty/carotene/src/convert.cpp
View file @
2d813910
...
...
@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u8
,
u16
,
16
,
register
uint8x16_t
zero0
asm
(
"q1"
)
=
vmovq_n_u8
(
0
);,
{
...
...
@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u8
,
s32
,
16
,
register
uint8x16_t
zero0
asm
(
"q1"
)
=
vmovq_n_u8
(
0
);
register
uint8x16_t
zero1
asm
(
"q2"
)
=
vmovq_n_u8
(
0
);
...
...
@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u8
,
f32
,
16
,
,
{
...
...
@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
s8
,
u16
,
16
,
register
uint8x16_t
zero0
asm
(
"q1"
)
=
vmovq_n_u8
(
0
);,
{
...
...
@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s8
,
s16
,
16
,
,
{
...
...
@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
s8
,
s32
,
16
,
,
{
...
...
@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s8
,
f32
,
16
,
,
{
...
...
@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u16
,
u8
,
16
,
,
{
...
...
@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u16
,
s8
,
16
,
register
uint8x16_t
v127
asm
(
"q4"
)
=
vmovq_n_u8
(
127
);,
{
...
...
@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u16
,
s16
,
8
,
register
uint16x8_t
v32767
asm
(
"q4"
)
=
vmovq_n_u16
(
0x7FFF
);,
{
...
...
@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
u16
,
s32
,
8
,
register
uint16x8_t
zero0
asm
(
"q1"
)
=
vmovq_n_u16
(
0
);,
{
...
...
@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
u16
,
f32
,
8
,
,
{
...
...
@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
u8
,
16
,
,
{
...
...
@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
s8
,
16
,
,
{
...
...
@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC
(
s16
,
u16
,
8
,
register
int16x8_t
vZero
asm
(
"q4"
)
=
vmovq_n_s16
(
0
);,
{
...
...
@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
s32
,
8
,
,
{
...
...
@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s16
,
f32
,
8
,
,
{
...
...
@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
u8
,
8
,
,
{
...
...
@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
s8
,
8
,
,
{
...
...
@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
u16
,
8
,
,
{
...
...
@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
s16
,
8
,
,
{
...
...
@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
s32
,
f32
,
8
,
,
{
...
...
@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
u8
,
8
,
register
float32x4_t
vmult
asm
(
"q0"
)
=
vdupq_n_f32
((
float
)(
1
<<
16
));
register
uint32x4_t
vmask
asm
(
"q1"
)
=
vdupq_n_u32
(
1
<<
16
);,
...
...
@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
s8
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
u16
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
s16
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC
(
f32
,
s32
,
8
,
register
float32x4_t
vhalf
asm
(
"q0"
)
=
vdupq_n_f32
(
0.5
f
);,
{
...
...
3rdparty/carotene/src/convert_scale.cpp
View file @
2d813910
...
...
@@ -473,7 +473,7 @@ CVTS_FUNC(u8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u8
,
s32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -562,7 +562,7 @@ CVTS_FUNC(u8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u8
,
f32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -985,7 +985,7 @@ CVTS_FUNC(s8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s8
,
s32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1074,7 +1074,7 @@ CVTS_FUNC(s8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s8
,
f32
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -1155,7 +1155,7 @@ CVTS_FUNC(s8, f32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
u8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1214,7 +1214,7 @@ CVTS_FUNC(u16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
s8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1273,7 +1273,7 @@ CVTS_FUNC(u16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
u16
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1330,7 +1330,7 @@ CVTS_FUNC1(u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
s16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1387,7 +1387,7 @@ CVTS_FUNC(u16, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1443,7 +1443,7 @@ CVTS_FUNC(u16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
u16
,
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -1495,7 +1495,7 @@ CVTS_FUNC(u16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
u8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1554,7 +1554,7 @@ CVTS_FUNC(s16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
s8
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1613,7 +1613,7 @@ CVTS_FUNC(s16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
u16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1670,7 +1670,7 @@ CVTS_FUNC(s16, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
s16
,
16
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1727,7 +1727,7 @@ CVTS_FUNC1(s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1783,7 +1783,7 @@ CVTS_FUNC(s16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s16
,
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -1835,7 +1835,7 @@ CVTS_FUNC(s16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
u8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1893,7 +1893,7 @@ CVTS_FUNC(s32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
s8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -1951,7 +1951,7 @@ CVTS_FUNC(s32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
u16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2007,7 +2007,7 @@ CVTS_FUNC(s32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
s16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2063,7 +2063,7 @@ CVTS_FUNC(s32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2118,7 +2118,7 @@ CVTS_FUNC1(s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
s32
,
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
@@ -2169,7 +2169,7 @@ CVTS_FUNC(s32, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
u8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)((
1
<<
16
)
*
alpha
));
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)((
1
<<
16
)
*
beta
));
...
...
@@ -2239,7 +2239,7 @@ CVTS_FUNC(f32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
s8
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2293,7 +2293,7 @@ CVTS_FUNC(f32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
u16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2345,7 +2345,7 @@ CVTS_FUNC(f32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
s16
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2397,7 +2397,7 @@ CVTS_FUNC(f32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC
(
f32
,
s32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
+
0.5
f
);,
...
...
@@ -2448,7 +2448,7 @@ CVTS_FUNC(f32, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1
(
f32
,
8
,
register
float32x4_t
vscale
asm
(
"q0"
)
=
vdupq_n_f32
((
f32
)
alpha
);
register
float32x4_t
vshift
asm
(
"q1"
)
=
vdupq_n_f32
((
f32
)
beta
);,
...
...
3rdparty/carotene/src/gaussian_blur.cpp
View file @
2d813910
...
...
@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn,
u16
*
lidx1
=
lane
+
x
-
1
*
2
;
u16
*
lidx3
=
lane
+
x
+
1
*
2
;
u16
*
lidx4
=
lane
+
x
+
2
*
2
;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
__volatile__
(
"vld2.16 {d0, d2}, [%[in0]]!
\n\t
"
"vld2.16 {d1, d3}, [%[in0]]
\n\t
"
...
...
3rdparty/carotene/src/pyramid.cpp
View file @
2d813910
...
...
@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for
(;
x
<
roiw8
;
x
+=
8
)
{
internal
::
prefetch
(
lane
+
2
*
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld2.16 {d0-d3}, [%[in0]]
\n\t
"
"vld2.16 {d4-d7}, [%[in4]]
\n\t
"
...
...
@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for
(;
x
<
roiw4
;
x
+=
4
)
{
internal
::
prefetch
(
lane
+
2
*
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld2.32 {d0-d3}, [%[in0]]
\n\t
"
"vld2.32 {d4-d7}, [%[in4]]
\n\t
"
...
...
@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
std
::
vector
<
f32
>
_buf
(
cn
*
(
srcSize
.
width
+
4
)
+
32
/
sizeof
(
f32
));
f32
*
lane
=
internal
::
alignPtr
(
&
_buf
[
2
*
cn
],
32
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
register
float32x4_t
vc6d4f32
asm
(
"q11"
)
=
vmovq_n_f32
(
1.5
f
);
// 6/4
register
float32x4_t
vc1d4f32
asm
(
"q12"
)
=
vmovq_n_f32
(
0.25
f
);
// 1/4
...
...
@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for
(;
x
<
roiw4
;
x
+=
4
)
{
internal
::
prefetch
(
lane
+
2
*
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
__volatile__
(
"vld2.32 {d0-d3}, [%[in0]]
\n\t
"
"vld2.32 {d8-d11}, [%[in4]]
\n\t
"
...
...
3rdparty/carotene/src/scharr.cpp
View file @
2d813910
...
...
@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
internal
::
prefetch
(
srow0
+
x
);
internal
::
prefetch
(
srow1
+
x
);
internal
::
prefetch
(
srow2
+
x
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__
(
"vld1.8 {d0}, [%[src0]]
\n\t
"
"vld1.8 {d2}, [%[src2]]
\n\t
"
...
...
@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
x
=
0
;
for
(
;
x
<
roiw8
;
x
+=
8
)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 &&
__GNUC_MINOR__ < 6 && !defined(__clang__)
__asm__
(
"vld1.16 {d4-d5}, [%[s2ptr]]
\n\t
"
"vld1.16 {d8-d9}, [%[s4ptr]]
\n\t
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment