Commit 156871d8 authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #8080 from alalek:fix_8079

parents 74defefd ec47a0a6
......@@ -113,9 +113,16 @@ static inline void store_interleave(float* ptr, const __m256& a, const __m256& b
v_transpose4x4(v_uint32x4(a1), v_uint32x4(b1), v_uint32x4(c1), z, u0, u1, u2, u3);
v_pack4x3to3x4(u0.val, u1.val, u2.val, u3.val, a1, b1, c1);
#if !defined(__GNUC__) || defined(__INTEL_COMPILER)
_mm256_storeu_ps(ptr, _mm256_setr_m128(_mm_castsi128_ps(a0), _mm_castsi128_ps(b0)));
_mm256_storeu_ps(ptr + 8, _mm256_setr_m128(_mm_castsi128_ps(c0), _mm_castsi128_ps(a1)));
_mm256_storeu_ps(ptr + 16, _mm256_setr_m128(_mm_castsi128_ps(b1), _mm_castsi128_ps(c1)));
#else
// GCC: workaround for missing AVX intrinsic: "_mm256_setr_m128()"
_mm256_storeu_ps(ptr, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(a0)), _mm_castsi128_ps(b0), 1));
_mm256_storeu_ps(ptr + 8, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(c0)), _mm_castsi128_ps(a1), 1));
_mm256_storeu_ps(ptr + 16, _mm256_insertf128_ps(_mm256_castps128_ps256(_mm_castsi128_ps(b1)), _mm_castsi128_ps(c1), 1));
#endif
}
#endif // CV_AVX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment