switch win64 intrinsics to loadu / storeu for unaligned memory.

BUG=372 TESTED=untested R=brucedawson@google.com, harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/30729004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1124 16f28f9a-4ce2-e073-06de-1de4eb20be90

switch win64 intrinsics to loadu / storeu for unaligned memory.
BUG=372 TESTED=untested R=brucedawson@google.com, harryjin@google.com Review URL: https://webrtc-codereview.appspot.com/30729004 git-svn-id: http://libyuv.googlecode.com/svn/trunk@1124 16f28f9a-4ce2-e073-06de-1de4eb20be90
3dbaaf00 · fbarchard@google.com · e7376886 · 3dbaaf00 · 3dbaaf00 · 3dbaaf00
Commit 3dbaaf00 authored Oct 16, 2014 by fbarchard@google.com
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 7 deletions

README.chromium README.chromium +1 -1

version.h include/libyuv/version.h +1 -1

row_win.cc source/row_win.cc +5 -5

No files found.
--- a/README.chromium
+++ b/README.chromium
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1123
+Version: 1124
 License: BSD
 License File: LICENSE

--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1123
+#define LIBYUV_VERSION 1124
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -89,8 +89,8 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
    xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
    xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
    xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
-    xmm1 = _mm_load_si128(&xmm0);
+    xmm1 = _mm_loadu_si128(&xmm0);
-    xmm2 = _mm_load_si128(&xmm0);
+    xmm2 = _mm_loadu_si128(&xmm0);
    xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
    xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
    xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
@@ -112,12 +112,12 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
    xmm2 = _mm_packus_epi16(xmm2, xmm2);
    xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
    xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
-    xmm1 = _mm_load_si128(&xmm0);
+    xmm1 = _mm_loadu_si128(&xmm0);
    xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
    xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
-    _mm_store_si128((__m128i *)dst_argb, xmm0);
+    _mm_storeu_si128((__m128i *)dst_argb, xmm0);
-    _mm_store_si128((__m128i *)(dst_argb + 16), xmm1);
+    _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);
    y_buf += 8;
    u_buf += 4;