Commit 2c7a123e authored by Khem Raj's avatar Khem Raj

Do not enable asm with clang

clang pretends to be gcc 4.2.0 which means we will
use inline asm for no reason, instead of builtins
on clang when possible.
Signed-off-by: 's avatarKhem Raj <raj.khem@gmail.com>
parent 6a5298a5
...@@ -231,7 +231,7 @@ void extract4(const Size2D &size, ...@@ -231,7 +231,7 @@ void extract4(const Size2D &size,
srcStride == dst2Stride && \ srcStride == dst2Stride && \
srcStride == dst3Stride && srcStride == dst3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define SPLIT_ASM2(sgn, bits) __asm__ ( \ #define SPLIT_ASM2(sgn, bits) __asm__ ( \
"vld2." #bits " {d0, d2}, [%[in0]] \n\t" \ "vld2." #bits " {d0, d2}, [%[in0]] \n\t" \
...@@ -351,7 +351,7 @@ void extract4(const Size2D &size, ...@@ -351,7 +351,7 @@ void extract4(const Size2D &size,
} \ } \
} }
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define ALPHA_QUAD(sgn, bits) { \ #define ALPHA_QUAD(sgn, bits) { \
internal::prefetch(src + sj); \ internal::prefetch(src + sj); \
......
...@@ -77,7 +77,7 @@ namespace CAROTENE_NS { ...@@ -77,7 +77,7 @@ namespace CAROTENE_NS {
dstStride == src2Stride && \ dstStride == src2Stride && \
dstStride == src3Stride && dstStride == src3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define MERGE_ASM2(sgn, bits) __asm__ ( \ #define MERGE_ASM2(sgn, bits) __asm__ ( \
"vld1." #bits " {d0-d1}, [%[in0]] \n\t" \ "vld1." #bits " {d0-d1}, [%[in0]] \n\t" \
......
This diff is collapsed.
...@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16, ...@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16,
} }
}) })
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, u16, 16, CVT_FUNC(u8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);, register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{ {
...@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16, ...@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, s32, 16, CVT_FUNC(u8, s32, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0); register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);
register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0); register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0);
...@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16, ...@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u8, f32, 16, CVT_FUNC(u8, f32, 16,
, ,
{ {
...@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16, ...@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16,
} }
}) })
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, u16, 16, CVT_FUNC(s8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);, register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{ {
...@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16, ...@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, s16, 16, CVT_FUNC(s8, s16, 16,
, ,
{ {
...@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16, ...@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, s32, 16, CVT_FUNC(s8, s32, 16,
, ,
{ {
...@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16, ...@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, f32, 16, CVT_FUNC(s8, f32, 16,
, ,
{ {
...@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16, ...@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, u8, 16, CVT_FUNC(u16, u8, 16,
, ,
{ {
...@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16, ...@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, s8, 16, CVT_FUNC(u16, s8, 16,
register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);, register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);,
{ {
...@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16, ...@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s16, 8, CVT_FUNC(u16, s16, 8,
register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);, register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);,
{ {
...@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8, ...@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s32, 8, CVT_FUNC(u16, s32, 8,
register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);, register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);,
{ {
...@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8, ...@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, f32, 8, CVT_FUNC(u16, f32, 8,
, ,
{ {
...@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8, ...@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, u8, 16, CVT_FUNC(s16, u8, 16,
, ,
{ {
...@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16, ...@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s8, 16, CVT_FUNC(s16, s8, 16,
, ,
{ {
...@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16, ...@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s16, u16, 8, CVT_FUNC(s16, u16, 8,
register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);, register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);,
{ {
...@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8, ...@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s32, 8, CVT_FUNC(s16, s32, 8,
, ,
{ {
...@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8, ...@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, f32, 8, CVT_FUNC(s16, f32, 8,
, ,
{ {
...@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8, ...@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u8, 8, CVT_FUNC(s32, u8, 8,
, ,
{ {
...@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8, ...@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s8, 8, CVT_FUNC(s32, s8, 8,
, ,
{ {
...@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8, ...@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u16, 8, CVT_FUNC(s32, u16, 8,
, ,
{ {
...@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8, ...@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s16, 8, CVT_FUNC(s32, s16, 8,
, ,
{ {
...@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8, ...@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, f32, 8, CVT_FUNC(s32, f32, 8,
, ,
{ {
...@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8, ...@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u8, 8, CVT_FUNC(f32, u8, 8,
register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16)); register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16));
register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);, register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);,
...@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8, ...@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s8, 8, CVT_FUNC(f32, s8, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
...@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8, ...@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u16, 8, CVT_FUNC(f32, u16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
...@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8, ...@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s16, 8, CVT_FUNC(f32, s16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
...@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8, ...@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s32, 8, CVT_FUNC(f32, s32, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
......
This diff is collapsed.
...@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn, ...@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn,
u16* lidx1 = lane + x - 1*2; u16* lidx1 = lane + x - 1*2;
u16* lidx3 = lane + x + 1*2; u16* lidx3 = lane + x + 1*2;
u16* lidx4 = lane + x + 2*2; u16* lidx4 = lane + x + 2*2;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ ( __asm__ __volatile__ (
"vld2.16 {d0, d2}, [%[in0]]! \n\t" "vld2.16 {d0, d2}, [%[in0]]! \n\t"
"vld2.16 {d1, d3}, [%[in0]] \n\t" "vld2.16 {d1, d3}, [%[in0]] \n\t"
......
...@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw8; x += 8) for (; x < roiw8; x += 8)
{ {
internal::prefetch(lane + 2 * x); internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ ( __asm__ (
"vld2.16 {d0-d3}, [%[in0]] \n\t" "vld2.16 {d0-d3}, [%[in0]] \n\t"
"vld2.16 {d4-d7}, [%[in4]] \n\t" "vld2.16 {d4-d7}, [%[in4]] \n\t"
...@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4) for (; x < roiw4; x += 4)
{ {
internal::prefetch(lane + 2 * x); internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ ( __asm__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t" "vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d4-d7}, [%[in4]] \n\t" "vld2.32 {d4-d7}, [%[in4]] \n\t"
...@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
std::vector<f32> _buf(cn*(srcSize.width + 4) + 32/sizeof(f32)); std::vector<f32> _buf(cn*(srcSize.width + 4) + 32/sizeof(f32));
f32* lane = internal::alignPtr(&_buf[2*cn], 32); f32* lane = internal::alignPtr(&_buf[2*cn], 32);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register float32x4_t vc6d4f32 asm ("q11") = vmovq_n_f32(1.5f); // 6/4 register float32x4_t vc6d4f32 asm ("q11") = vmovq_n_f32(1.5f); // 6/4
register float32x4_t vc1d4f32 asm ("q12") = vmovq_n_f32(0.25f); // 1/4 register float32x4_t vc1d4f32 asm ("q12") = vmovq_n_f32(0.25f); // 1/4
...@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4) for (; x < roiw4; x += 4)
{ {
internal::prefetch(lane + 2 * x); internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ ( __asm__ __volatile__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t" "vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d8-d11}, [%[in4]] \n\t" "vld2.32 {d8-d11}, [%[in4]] \n\t"
......
...@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn, ...@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
internal::prefetch(srow0 + x); internal::prefetch(srow0 + x);
internal::prefetch(srow1 + x); internal::prefetch(srow1 + x);
internal::prefetch(srow2 + x); internal::prefetch(srow2 + x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ ( __asm__ (
"vld1.8 {d0}, [%[src0]] \n\t" "vld1.8 {d0}, [%[src0]] \n\t"
"vld1.8 {d2}, [%[src2]] \n\t" "vld1.8 {d2}, [%[src2]] \n\t"
...@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn, ...@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
x = 0; x = 0;
for( ; x < roiw8; x += 8 ) for( ; x < roiw8; x += 8 )
{ {
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
__asm__ ( __asm__ (
"vld1.16 {d4-d5}, [%[s2ptr]] \n\t" "vld1.16 {d4-d5}, [%[s2ptr]] \n\t"
"vld1.16 {d8-d9}, [%[s4ptr]] \n\t" "vld1.16 {d8-d9}, [%[s4ptr]] \n\t"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment