Commit 2d813910 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #9669 from kraj:master

parents c57aef75 2c7a123e
...@@ -231,7 +231,7 @@ void extract4(const Size2D &size, ...@@ -231,7 +231,7 @@ void extract4(const Size2D &size,
srcStride == dst2Stride && \ srcStride == dst2Stride && \
srcStride == dst3Stride && srcStride == dst3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define SPLIT_ASM2(sgn, bits) __asm__ ( \ #define SPLIT_ASM2(sgn, bits) __asm__ ( \
"vld2." #bits " {d0, d2}, [%[in0]] \n\t" \ "vld2." #bits " {d0, d2}, [%[in0]] \n\t" \
...@@ -351,7 +351,7 @@ void extract4(const Size2D &size, ...@@ -351,7 +351,7 @@ void extract4(const Size2D &size,
} \ } \
} }
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define ALPHA_QUAD(sgn, bits) { \ #define ALPHA_QUAD(sgn, bits) { \
internal::prefetch(src + sj); \ internal::prefetch(src + sj); \
......
...@@ -77,7 +77,7 @@ namespace CAROTENE_NS { ...@@ -77,7 +77,7 @@ namespace CAROTENE_NS {
dstStride == src2Stride && \ dstStride == src2Stride && \
dstStride == src3Stride && dstStride == src3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define MERGE_ASM2(sgn, bits) __asm__ ( \ #define MERGE_ASM2(sgn, bits) __asm__ ( \
"vld1." #bits " {d0-d1}, [%[in0]] \n\t" \ "vld1." #bits " {d0-d1}, [%[in0]] \n\t" \
......
This diff is collapsed.
...@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16, ...@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16,
} }
}) })
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, u16, 16, CVT_FUNC(u8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);, register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{ {
...@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16, ...@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, s32, 16, CVT_FUNC(u8, s32, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0); register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);
register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0); register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0);
...@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16, ...@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u8, f32, 16, CVT_FUNC(u8, f32, 16,
, ,
{ {
...@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16, ...@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16,
} }
}) })
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, u16, 16, CVT_FUNC(s8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);, register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{ {
...@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16, ...@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, s16, 16, CVT_FUNC(s8, s16, 16,
, ,
{ {
...@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16, ...@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, s32, 16, CVT_FUNC(s8, s32, 16,
, ,
{ {
...@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16, ...@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, f32, 16, CVT_FUNC(s8, f32, 16,
, ,
{ {
...@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16, ...@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, u8, 16, CVT_FUNC(u16, u8, 16,
, ,
{ {
...@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16, ...@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, s8, 16, CVT_FUNC(u16, s8, 16,
register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);, register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);,
{ {
...@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16, ...@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s16, 8, CVT_FUNC(u16, s16, 8,
register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);, register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);,
{ {
...@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8, ...@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s32, 8, CVT_FUNC(u16, s32, 8,
register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);, register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);,
{ {
...@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8, ...@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, f32, 8, CVT_FUNC(u16, f32, 8,
, ,
{ {
...@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8, ...@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, u8, 16, CVT_FUNC(s16, u8, 16,
, ,
{ {
...@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16, ...@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s8, 16, CVT_FUNC(s16, s8, 16,
, ,
{ {
...@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16, ...@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s16, u16, 8, CVT_FUNC(s16, u16, 8,
register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);, register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);,
{ {
...@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8, ...@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s32, 8, CVT_FUNC(s16, s32, 8,
, ,
{ {
...@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8, ...@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, f32, 8, CVT_FUNC(s16, f32, 8,
, ,
{ {
...@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8, ...@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u8, 8, CVT_FUNC(s32, u8, 8,
, ,
{ {
...@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8, ...@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s8, 8, CVT_FUNC(s32, s8, 8,
, ,
{ {
...@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8, ...@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u16, 8, CVT_FUNC(s32, u16, 8,
, ,
{ {
...@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8, ...@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s16, 8, CVT_FUNC(s32, s16, 8,
, ,
{ {
...@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8, ...@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, f32, 8, CVT_FUNC(s32, f32, 8,
, ,
{ {
...@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8, ...@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u8, 8, CVT_FUNC(f32, u8, 8,
register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16)); register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16));
register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);, register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);,
...@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8, ...@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s8, 8, CVT_FUNC(f32, s8, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
...@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8, ...@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u16, 8, CVT_FUNC(f32, u16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
...@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8, ...@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s16, 8, CVT_FUNC(f32, s16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
...@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8, ...@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8,
}) })
#endif #endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s32, 8, CVT_FUNC(f32, s32, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);, register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{ {
......
This diff is collapsed.
...@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn, ...@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn,
u16* lidx1 = lane + x - 1*2; u16* lidx1 = lane + x - 1*2;
u16* lidx3 = lane + x + 1*2; u16* lidx3 = lane + x + 1*2;
u16* lidx4 = lane + x + 2*2; u16* lidx4 = lane + x + 2*2;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ ( __asm__ __volatile__ (
"vld2.16 {d0, d2}, [%[in0]]! \n\t" "vld2.16 {d0, d2}, [%[in0]]! \n\t"
"vld2.16 {d1, d3}, [%[in0]] \n\t" "vld2.16 {d1, d3}, [%[in0]] \n\t"
......
...@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw8; x += 8) for (; x < roiw8; x += 8)
{ {
internal::prefetch(lane + 2 * x); internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ ( __asm__ (
"vld2.16 {d0-d3}, [%[in0]] \n\t" "vld2.16 {d0-d3}, [%[in0]] \n\t"
"vld2.16 {d4-d7}, [%[in4]] \n\t" "vld2.16 {d4-d7}, [%[in4]] \n\t"
...@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4) for (; x < roiw4; x += 4)
{ {
internal::prefetch(lane + 2 * x); internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ ( __asm__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t" "vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d4-d7}, [%[in4]] \n\t" "vld2.32 {d4-d7}, [%[in4]] \n\t"
...@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
std::vector<f32> _buf(cn*(srcSize.width + 4) + 32/sizeof(f32)); std::vector<f32> _buf(cn*(srcSize.width + 4) + 32/sizeof(f32));
f32* lane = internal::alignPtr(&_buf[2*cn], 32); f32* lane = internal::alignPtr(&_buf[2*cn], 32);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register float32x4_t vc6d4f32 asm ("q11") = vmovq_n_f32(1.5f); // 6/4 register float32x4_t vc6d4f32 asm ("q11") = vmovq_n_f32(1.5f); // 6/4
register float32x4_t vc1d4f32 asm ("q12") = vmovq_n_f32(0.25f); // 1/4 register float32x4_t vc1d4f32 asm ("q12") = vmovq_n_f32(0.25f); // 1/4
...@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize, ...@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4) for (; x < roiw4; x += 4)
{ {
internal::prefetch(lane + 2 * x); internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ ( __asm__ __volatile__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t" "vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d8-d11}, [%[in4]] \n\t" "vld2.32 {d8-d11}, [%[in4]] \n\t"
......
...@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn, ...@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
internal::prefetch(srow0 + x); internal::prefetch(srow0 + x);
internal::prefetch(srow1 + x); internal::prefetch(srow1 + x);
internal::prefetch(srow2 + x); internal::prefetch(srow2 + x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ ( __asm__ (
"vld1.8 {d0}, [%[src0]] \n\t" "vld1.8 {d0}, [%[src0]] \n\t"
"vld1.8 {d2}, [%[src2]] \n\t" "vld1.8 {d2}, [%[src2]] \n\t"
...@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn, ...@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
x = 0; x = 0;
for( ; x < roiw8; x += 8 ) for( ; x < roiw8; x += 8 )
{ {
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 #if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
__asm__ ( __asm__ (
"vld1.16 {d4-d5}, [%[s2ptr]] \n\t" "vld1.16 {d4-d5}, [%[s2ptr]] \n\t"
"vld1.16 {d8-d9}, [%[s4ptr]] \n\t" "vld1.16 {d8-d9}, [%[s4ptr]] \n\t"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment