Commit 2c7a123e authored by Khem Raj's avatar Khem Raj

Do not enable asm with clang

clang pretends to be gcc 4.2.0 which means we will
use inline asm for no reason, instead of builtins
on clang when possible.
Signed-off-by: 's avatarKhem Raj <raj.khem@gmail.com>
parent 6a5298a5
......@@ -231,7 +231,7 @@ void extract4(const Size2D &size,
srcStride == dst2Stride && \
srcStride == dst3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define SPLIT_ASM2(sgn, bits) __asm__ ( \
"vld2." #bits " {d0, d2}, [%[in0]] \n\t" \
......@@ -351,7 +351,7 @@ void extract4(const Size2D &size,
} \
}
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define ALPHA_QUAD(sgn, bits) { \
internal::prefetch(src + sj); \
......
......@@ -77,7 +77,7 @@ namespace CAROTENE_NS {
dstStride == src2Stride && \
dstStride == src3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define MERGE_ASM2(sgn, bits) __asm__ ( \
"vld1." #bits " {d0-d1}, [%[in0]] \n\t" \
......
......@@ -97,7 +97,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space,
const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
......@@ -116,7 +116,7 @@ void rgb2gray(const Size2D &size, COLOR_SPACE color_space,
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 24, dj += 8)
{
internal::prefetch(src + sj);
......@@ -198,7 +198,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space,
const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
......@@ -217,7 +217,7 @@ void rgbx2gray(const Size2D &size, COLOR_SPACE color_space,
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 32, dj += 8)
{
internal::prefetch(src + sj);
......@@ -300,7 +300,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space,
const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
......@@ -319,7 +319,7 @@ void bgr2gray(const Size2D &size, COLOR_SPACE color_space,
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 24, dj += 8)
{
internal::prefetch(src + sj);
......@@ -402,7 +402,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space,
const u32 G2Y = color_space == COLOR_SPACE_BT601 ? G2Y_BT601 : G2Y_BT709;
const u32 B2Y = color_space == COLOR_SPACE_BT601 ? B2Y_BT601 : B2Y_BT709;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register int16x4_t v_r2y asm ("d31") = vmov_n_s16(R2Y);
register int16x4_t v_g2y asm ("d30") = vmov_n_s16(G2Y);
register int16x4_t v_b2y asm ("d29") = vmov_n_s16(B2Y);
......@@ -421,7 +421,7 @@ void bgrx2gray(const Size2D &size, COLOR_SPACE color_space,
u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
size_t sj = 0u, dj = 0u;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
for (; dj < roiw8; sj += 32, dj += 8)
{
internal::prefetch(src + sj);
......@@ -512,7 +512,7 @@ void gray2rgb(const Size2D &size,
for (; sj < roiw16; sj += 16, dj += 48)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0-d1}, [%[in0]] \n\t"
"vmov.8 q1, q0 \n\t"
......@@ -538,7 +538,7 @@ void gray2rgb(const Size2D &size,
if (sj < roiw8)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0}, [%[in]] \n\t"
"vmov.8 d1, d0 \n\t"
......@@ -584,7 +584,7 @@ void gray2rgbx(const Size2D &size,
size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register uint8x16_t vc255 asm ("q4") = vmovq_n_u8(255);
#else
uint8x16x4_t vRgba;
......@@ -602,7 +602,7 @@ void gray2rgbx(const Size2D &size,
for (; sj < roiw16; sj += 16, dj += 64)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0-d1}, [%[in0]] \n\t"
"vmov.8 q1, q0 \n\t"
......@@ -628,7 +628,7 @@ void gray2rgbx(const Size2D &size,
if (sj < roiw8)
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d5}, [%[in]] \n\t"
"vmov.8 d6, d5 \n\t"
......@@ -1409,7 +1409,7 @@ inline void convertToHSV(const s32 r, const s32 g, const s32 b,
"d24","d25","d26","d27","d28","d29","d30","d31" \
);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define YCRCB_CONSTS \
register int16x4_t vcYR asm ("d31") = vmov_n_s16(4899); \
......@@ -1555,7 +1555,7 @@ inline uint8x8x3_t convertToYCrCb( const int16x8_t& vR, const int16x8_t& vG, con
#define COEFF_G ( 8663)
#define COEFF_B (-17705)
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define YUV420ALPHA3_CONST
#define YUV420ALPHA4_CONST register uint8x16_t c255 asm ("q13") = vmovq_n_u8(255);
#define YUV420ALPHA3_CONVERT
......@@ -1852,7 +1852,7 @@ void rgb2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
......@@ -1871,7 +1871,7 @@ void rgb2hsv(const Size2D &size,
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld3.8 {d0-d2}, d0, d2)
#else
uint8x8x3_t vRgb = vld3_u8(src + sj);
......@@ -1904,7 +1904,7 @@ void rgbx2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
......@@ -1923,7 +1923,7 @@ void rgbx2hsv(const Size2D &size,
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld4.8 {d0-d3}, d0, d2)
#else
uint8x8x4_t vRgb = vld4_u8(src + sj);
......@@ -1956,7 +1956,7 @@ void bgr2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
......@@ -1975,7 +1975,7 @@ void bgr2hsv(const Size2D &size,
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld3.8 {d0-d2}, d2, d0)
#else
uint8x8x3_t vRgb = vld3_u8(src + sj);
......@@ -2008,7 +2008,7 @@ void bgrx2hsv(const Size2D &size,
#ifdef CAROTENE_NEON
size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
const s32 hsv_shift = 12;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register const f32 vsdiv_table = f32(255 << hsv_shift);
register f32 vhdiv_table = f32(hrange << hsv_shift);
register const s32 vhrange = hrange;
......@@ -2027,7 +2027,7 @@ void bgrx2hsv(const Size2D &size,
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERT_TO_HSV_ASM(vld4.8 {d0-d3}, d2, d0)
#else
uint8x8x4_t vRgb = vld4_u8(src + sj);
......@@ -2068,7 +2068,7 @@ void rgbx2bgr565(const Size2D &size,
for (; j < roiw16; sj += 64, dj += 32, j += 16)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld4.8 {d2, d4, d6, d8}, [%[in0]] @ q0 q1 q2 q3 q4 \n\t"
"vld4.8 {d3, d5, d7, d9}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t"
......@@ -2122,7 +2122,7 @@ void rgb2bgr565(const Size2D &size,
for (; j < roiw16; sj += 48, dj += 32, j += 16)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld3.8 {d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 q4 \n\t"
"vld3.8 {d3, d5, d7}, [%[in1]] @ xxxxxxxx rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t"
......@@ -2176,7 +2176,7 @@ void rgbx2rgb565(const Size2D &size,
for (; j < roiw16; sj += 64, dj += 32, j += 16)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld4.8 {d0, d2, d4, d6}, [%[in0]] @ q0 q1 q2 q3 \n\t"
"vld4.8 {d1, d3, d5, d7}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB aaaaAAAA \n\t"
......@@ -2230,7 +2230,7 @@ void rgb2rgb565(const Size2D &size,
for (; j < roiw16; sj += 48, dj += 32, j += 16)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld3.8 {d0, d2, d4}, [%[in0]] @ q0 q1 q2 q3 \n\t"
"vld3.8 {d1, d3, d5}, [%[in1]] @ rrrrRRRR ggggGGGG bbbbBBBB xxxxxxxx \n\t"
......@@ -2285,7 +2285,7 @@ void rgb2ycrcb(const Size2D &size,
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld3.8 {d0-d2}, d0, d1, d2)
#else
uint8x8x3_t vRgb = vld3_u8(src + sj);
......@@ -2329,7 +2329,7 @@ void rgbx2ycrcb(const Size2D &size,
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld4.8 {d0-d3}, d0, d1, d2)
#else
uint8x8x4_t vRgba = vld4_u8(src + sj);
......@@ -2373,7 +2373,7 @@ void bgr2ycrcb(const Size2D &size,
for (; j < roiw8; sj += 24, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld3.8 {d0-d2}, d2, d1, d0)
#else
uint8x8x3_t vBgr = vld3_u8(src + sj);
......@@ -2417,7 +2417,7 @@ void bgrx2ycrcb(const Size2D &size,
for (; j < roiw8; sj += 32, dj += 24, j += 8)
{
internal::prefetch(src + sj);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTTOYCRCB(vld4.8 {d0-d3}, d2, d1, d0)
#else
uint8x8x4_t vBgra = vld4_u8(src + sj);
......@@ -2499,7 +2499,7 @@ void yuv420sp2rgb(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d1, d0, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......@@ -2545,7 +2545,7 @@ void yuv420sp2rgbx(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d1, d0, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......@@ -2591,7 +2591,7 @@ void yuv420i2rgb(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d0, d1, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......@@ -2637,7 +2637,7 @@ void yuv420i2rgbx(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d0, d1, q5, q6)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......@@ -2683,7 +2683,7 @@ void yuv420sp2bgr(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d1, d0, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......@@ -2729,7 +2729,7 @@ void yuv420sp2bgrx(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d1, d0, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......@@ -2775,7 +2775,7 @@ void yuv420i2bgr(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(3, d0, d1, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......@@ -2821,7 +2821,7 @@ void yuv420i2bgrx(const Size2D &size,
internal::prefetch(uv + j);
internal::prefetch(y1 + j);
internal::prefetch(y2 + j);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CONVERTYUV420TORGB(4, d0, d1, q6, q5)
#else
convertYUV420.ToRGB(y1 + j, y2 + j, uv + j, dst1 + dj, dst2 + dj);
......
......@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{
......@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, s32, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);
register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0);
......@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u8, f32, 16,
,
{
......@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{
......@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, s16, 16,
,
{
......@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, s32, 16,
,
{
......@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, f32, 16,
,
{
......@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, u8, 16,
,
{
......@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, s8, 16,
register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);,
{
......@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s16, 8,
register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);,
{
......@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s32, 8,
register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);,
{
......@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, f32, 8,
,
{
......@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, u8, 16,
,
{
......@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s8, 16,
,
{
......@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s16, u16, 8,
register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);,
{
......@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s32, 8,
,
{
......@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, f32, 8,
,
{
......@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u8, 8,
,
{
......@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s8, 8,
,
{
......@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u16, 8,
,
{
......@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s16, 8,
,
{
......@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, f32, 8,
,
{
......@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u8, 8,
register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16));
register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);,
......@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s8, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s32, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......
......@@ -473,7 +473,7 @@ CVTS_FUNC(u8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u8, s32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -562,7 +562,7 @@ CVTS_FUNC(u8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u8, f32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
......@@ -985,7 +985,7 @@ CVTS_FUNC(s8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s8, s32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1074,7 +1074,7 @@ CVTS_FUNC(s8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s8, f32, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
......@@ -1155,7 +1155,7 @@ CVTS_FUNC(s8, f32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, u8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1214,7 +1214,7 @@ CVTS_FUNC(u16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, s8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1273,7 +1273,7 @@ CVTS_FUNC(u16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(u16, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1330,7 +1330,7 @@ CVTS_FUNC1(u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, s16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1387,7 +1387,7 @@ CVTS_FUNC(u16, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1443,7 +1443,7 @@ CVTS_FUNC(u16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(u16, f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
......@@ -1495,7 +1495,7 @@ CVTS_FUNC(u16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, u8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1554,7 +1554,7 @@ CVTS_FUNC(s16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, s8, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1613,7 +1613,7 @@ CVTS_FUNC(s16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, u16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1670,7 +1670,7 @@ CVTS_FUNC(s16, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(s16, 16,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1727,7 +1727,7 @@ CVTS_FUNC1(s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1783,7 +1783,7 @@ CVTS_FUNC(s16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s16, f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
......@@ -1835,7 +1835,7 @@ CVTS_FUNC(s16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, u8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1893,7 +1893,7 @@ CVTS_FUNC(s32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, s8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -1951,7 +1951,7 @@ CVTS_FUNC(s32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, u16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -2007,7 +2007,7 @@ CVTS_FUNC(s32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, s16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -2063,7 +2063,7 @@ CVTS_FUNC(s32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -2118,7 +2118,7 @@ CVTS_FUNC1(s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(s32, f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
......@@ -2169,7 +2169,7 @@ CVTS_FUNC(s32, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, u8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)((1 << 16)*alpha));
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)((1 << 16)*beta));
......@@ -2239,7 +2239,7 @@ CVTS_FUNC(f32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, s8, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -2293,7 +2293,7 @@ CVTS_FUNC(f32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, u16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -2345,7 +2345,7 @@ CVTS_FUNC(f32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, s16, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -2397,7 +2397,7 @@ CVTS_FUNC(f32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC(f32, s32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta + 0.5f);,
......@@ -2448,7 +2448,7 @@ CVTS_FUNC(f32, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVTS_FUNC1(f32, 8,
register float32x4_t vscale asm ("q0") = vdupq_n_f32((f32)alpha);
register float32x4_t vshift asm ("q1") = vdupq_n_f32((f32)beta);,
......
......@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn,
u16* lidx1 = lane + x - 1*2;
u16* lidx3 = lane + x + 1*2;
u16* lidx4 = lane + x + 2*2;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ (
"vld2.16 {d0, d2}, [%[in0]]! \n\t"
"vld2.16 {d1, d3}, [%[in0]] \n\t"
......
......@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw8; x += 8)
{
internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld2.16 {d0-d3}, [%[in0]] \n\t"
"vld2.16 {d4-d7}, [%[in4]] \n\t"
......@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4)
{
internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d4-d7}, [%[in4]] \n\t"
......@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
std::vector<f32> _buf(cn*(srcSize.width + 4) + 32/sizeof(f32));
f32* lane = internal::alignPtr(&_buf[2*cn], 32);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register float32x4_t vc6d4f32 asm ("q11") = vmovq_n_f32(1.5f); // 6/4
register float32x4_t vc1d4f32 asm ("q12") = vmovq_n_f32(0.25f); // 1/4
......@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4)
{
internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d8-d11}, [%[in4]] \n\t"
......
......@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
internal::prefetch(srow0 + x);
internal::prefetch(srow1 + x);
internal::prefetch(srow2 + x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0}, [%[src0]] \n\t"
"vld1.8 {d2}, [%[src2]] \n\t"
......@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
x = 0;
for( ; x < roiw8; x += 8 )
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
__asm__ (
"vld1.16 {d4-d5}, [%[s2ptr]] \n\t"
"vld1.16 {d8-d9}, [%[s4ptr]] \n\t"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment