Commit 3dbaaf00 authored by fbarchard@google.com's avatar fbarchard@google.com

switch win64 intrinsics to loadu / storeu for unaligned memory.

BUG=372
TESTED=untested
R=brucedawson@google.com, harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/30729004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1124 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent e7376886
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1123 Version: 1124
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1123 #define LIBYUV_VERSION 1124
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -89,8 +89,8 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -89,8 +89,8 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
xmm1 = _mm_load_si128(&xmm0); xmm1 = _mm_loadu_si128(&xmm0);
xmm2 = _mm_load_si128(&xmm0); xmm2 = _mm_loadu_si128(&xmm0);
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB); xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG); xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR); xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
...@@ -112,12 +112,12 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -112,12 +112,12 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
xmm2 = _mm_packus_epi16(xmm2, xmm2); xmm2 = _mm_packus_epi16(xmm2, xmm2);
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
xmm1 = _mm_load_si128(&xmm0); xmm1 = _mm_loadu_si128(&xmm0);
xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
_mm_store_si128((__m128i *)dst_argb, xmm0); _mm_storeu_si128((__m128i *)dst_argb, xmm0);
_mm_store_si128((__m128i *)(dst_argb + 16), xmm1); _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);
y_buf += 8; y_buf += 8;
u_buf += 4; u_buf += 4;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment