Commit 2d813910 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #9669 from kraj:master

parents c57aef75 2c7a123e
......@@ -231,7 +231,7 @@ void extract4(const Size2D &size,
srcStride == dst2Stride && \
srcStride == dst3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define SPLIT_ASM2(sgn, bits) __asm__ ( \
"vld2." #bits " {d0, d2}, [%[in0]] \n\t" \
......@@ -351,7 +351,7 @@ void extract4(const Size2D &size,
} \
}
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define ALPHA_QUAD(sgn, bits) { \
internal::prefetch(src + sj); \
......
......@@ -77,7 +77,7 @@ namespace CAROTENE_NS {
dstStride == src2Stride && \
dstStride == src3Stride &&
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
#define MERGE_ASM2(sgn, bits) __asm__ ( \
"vld1." #bits " {d0-d1}, [%[in0]] \n\t" \
......
This diff is collapsed.
......@@ -101,7 +101,7 @@ CVT_FUNC(u8, s8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{
......@@ -135,7 +135,7 @@ CVT_FUNC(u8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u8, s32, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);
register uint8x16_t zero1 asm ("q2") = vmovq_n_u8(0);
......@@ -173,7 +173,7 @@ CVT_FUNC(u8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u8, f32, 16,
,
{
......@@ -248,7 +248,7 @@ CVT_FUNC(s8, u8, 16,
}
})
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, u16, 16,
register uint8x16_t zero0 asm ("q1") = vmovq_n_u8(0);,
{
......@@ -284,7 +284,7 @@ CVT_FUNC(s8, u16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, s16, 16,
,
{
......@@ -323,7 +323,7 @@ CVT_FUNC(s8, s16, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s8, s32, 16,
,
{
......@@ -377,7 +377,7 @@ CVT_FUNC(s8, s32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s8, f32, 16,
,
{
......@@ -440,7 +440,7 @@ CVT_FUNC(s8, f32, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, u8, 16,
,
{
......@@ -479,7 +479,7 @@ CVT_FUNC(u16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, s8, 16,
register uint8x16_t v127 asm ("q4") = vmovq_n_u8(127);,
{
......@@ -522,7 +522,7 @@ CVT_FUNC(u16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s16, 8,
register uint16x8_t v32767 asm ("q4") = vmovq_n_u16(0x7FFF);,
{
......@@ -555,7 +555,7 @@ CVT_FUNC(u16, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(u16, s32, 8,
register uint16x8_t zero0 asm ("q1") = vmovq_n_u16(0);,
{
......@@ -589,7 +589,7 @@ CVT_FUNC(u16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(u16, f32, 8,
,
{
......@@ -633,7 +633,7 @@ CVT_FUNC(u16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, u8, 16,
,
{
......@@ -672,7 +672,7 @@ CVT_FUNC(s16, u8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s8, 16,
,
{
......@@ -711,7 +711,7 @@ CVT_FUNC(s16, s8, 16,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
CVT_FUNC(s16, u16, 8,
register int16x8_t vZero asm ("q4") = vmovq_n_s16(0);,
{
......@@ -747,7 +747,7 @@ CVT_FUNC(s16, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, s32, 8,
,
{
......@@ -786,7 +786,7 @@ CVT_FUNC(s16, s32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s16, f32, 8,
,
{
......@@ -829,7 +829,7 @@ CVT_FUNC(s16, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u8, 8,
,
{
......@@ -870,7 +870,7 @@ CVT_FUNC(s32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s8, 8,
,
{
......@@ -911,7 +911,7 @@ CVT_FUNC(s32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, u16, 8,
,
{
......@@ -950,7 +950,7 @@ CVT_FUNC(s32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, s16, 8,
,
{
......@@ -989,7 +989,7 @@ CVT_FUNC(s32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(s32, f32, 8,
,
{
......@@ -1034,7 +1034,7 @@ CVT_FUNC(s32, f32, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u8, 8,
register float32x4_t vmult asm ("q0") = vdupq_n_f32((float)(1 << 16));
register uint32x4_t vmask asm ("q1") = vdupq_n_u32(1<<16);,
......@@ -1101,7 +1101,7 @@ CVT_FUNC(f32, u8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s8, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......@@ -1153,7 +1153,7 @@ CVT_FUNC(f32, s8, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, u16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......@@ -1212,7 +1212,7 @@ CVT_FUNC(f32, u16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s16, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......@@ -1271,7 +1271,7 @@ CVT_FUNC(f32, s16, 8,
})
#endif
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
CVT_FUNC(f32, s32, 8,
register float32x4_t vhalf asm ("q0") = vdupq_n_f32(0.5f);,
{
......
This diff is collapsed.
......@@ -327,7 +327,7 @@ void gaussianBlur5x5(const Size2D &size, s32 cn,
u16* lidx1 = lane + x - 1*2;
u16* lidx3 = lane + x + 1*2;
u16* lidx4 = lane + x + 2*2;
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ (
"vld2.16 {d0, d2}, [%[in0]]! \n\t"
"vld2.16 {d1, d3}, [%[in0]] \n\t"
......
......@@ -331,7 +331,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw8; x += 8)
{
internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld2.16 {d0-d3}, [%[in0]] \n\t"
"vld2.16 {d4-d7}, [%[in4]] \n\t"
......@@ -538,7 +538,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4)
{
internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d4-d7}, [%[in4]] \n\t"
......@@ -672,7 +672,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
std::vector<f32> _buf(cn*(srcSize.width + 4) + 32/sizeof(f32));
f32* lane = internal::alignPtr(&_buf[2*cn], 32);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
register float32x4_t vc6d4f32 asm ("q11") = vmovq_n_f32(1.5f); // 6/4
register float32x4_t vc1d4f32 asm ("q12") = vmovq_n_f32(0.25f); // 1/4
......@@ -739,7 +739,7 @@ void gaussianPyramidDown(const Size2D &srcSize,
for (; x < roiw4; x += 4)
{
internal::prefetch(lane + 2 * x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ __volatile__ (
"vld2.32 {d0-d3}, [%[in0]] \n\t"
"vld2.32 {d8-d11}, [%[in4]] \n\t"
......
......@@ -109,7 +109,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
internal::prefetch(srow0 + x);
internal::prefetch(srow1 + x);
internal::prefetch(srow2 + x);
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 7 && !defined(__clang__)
__asm__ (
"vld1.8 {d0}, [%[src0]] \n\t"
"vld1.8 {d2}, [%[src2]] \n\t"
......@@ -161,7 +161,7 @@ void ScharrDeriv(const Size2D &size, s32 cn,
x = 0;
for( ; x < roiw8; x += 8 )
{
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6
#if !defined(__aarch64__) && defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__clang__)
__asm__ (
"vld1.16 {d4-d5}, [%[s2ptr]] \n\t"
"vld1.16 {d8-d9}, [%[s4ptr]] \n\t"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment