Commit 032b5f99 authored by fbarchard@google.com's avatar fbarchard@google.com

port I420ToYUY2 code to support I420ToYUY2 I422ToYUY2 and I420ToV210

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/388011

git-svn-id: http://libyuv.googlecode.com/svn/trunk@173 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 798197fc
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 172 Version: 173
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -69,6 +69,19 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, ...@@ -69,6 +69,19 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int RGB24ToI420(const uint8* src_frame, int src_stride_frame, int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 172 #define LIBYUV_VERSION 173
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -10,12 +10,6 @@ ...@@ -10,12 +10,6 @@
#include "libyuv/convert.h" #include "libyuv/convert.h"
//#define SCALEOPT //Currently for windows only. June 2010
#ifdef SCALEOPT
#include <emmintrin.h> // Not currently used
#endif
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h" #include "libyuv/format_conversion.h"
...@@ -32,6 +26,9 @@ extern "C" { ...@@ -32,6 +26,9 @@ extern "C" {
// YUY2 - Macro-pixel = 2 image pixels // YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
// UYVY - Macro-pixel = 2 image pixels
// U0Y0V0Y1
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2 #define HAS_I42XTOYUY2ROW_SSE2
__declspec(naked) __declspec(naked)
...@@ -50,12 +47,12 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y, ...@@ -50,12 +47,12 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
sub edx, esi sub edx, esi
convertloop: convertloop:
movdqa xmm0, [eax] // Y
lea eax, [eax + 16]
movq xmm2, qword ptr [esi] // U movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8] lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
lea eax, [eax + 16]
movdqa xmm1, xmm0 movdqa xmm1, xmm0
punpcklbw xmm0, xmm2 // YUYV punpcklbw xmm0, xmm2 // YUYV
punpckhbw xmm1, xmm2 punpckhbw xmm1, xmm2
...@@ -70,6 +67,44 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y, ...@@ -70,6 +67,44 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
ret ret
} }
} }
#define HAS_I42XTOUYVYROW_SSE2
__declspec(naked)
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
movdqa xmm1, xmm2
lea eax, [eax + 16]
punpcklbw xmm1, xmm0 // UYVY
punpckhbw xmm2, xmm0
movdqa [edi], xmm1
movdqa [edi + 16], xmm2
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM) #elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2 #define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y, static void I42xToYUY2Row_SSE2(const uint8* src_y,
...@@ -77,33 +112,68 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y, ...@@ -77,33 +112,68 @@ static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_v, const uint8* src_v,
uint8* dst_frame, int width) { uint8* dst_frame, int width) {
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %1,%2 \n"
"1: \n" "1: \n"
"movdqa (%0),%%xmm0 \n" "movq (%1),%%xmm2 \n"
"lea 0x10(%0),%0 \n" "movq (%1,%2,1),%%xmm3 \n"
"movq (%1),%%xmm2 \n" "lea 0x8(%1),%1 \n"
"movq (%1,%2,1),%%xmm3 \n" "punpcklbw %%xmm3,%%xmm2 \n"
"lea 0x8(%1),%1 \n" "movdqa (%0),%%xmm0 \n"
"punpcklbw %%xmm3,%%xmm2 \n" "lea 0x10(%0),%0 \n"
"movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n" "punpckhbw %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n" "movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n" "movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n" "lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n" "sub $0x10,%4 \n"
"ja 1b \n" "ja 1b \n"
: "+r"(src_y), // %0 : "+r"(src_y), // %0
"+r"(src_u), // %1 "+r"(src_u), // %1
"+r"(src_v), // %2 "+r"(src_v), // %2
"+r"(dst_frame), // %3 "+r"(dst_frame), // %3
"+rm"(width) // %4 "+rm"(width) // %4
: :
: "memory", "cc" : "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#define HAS_I42XTOUYVYROW_SSE2
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"lea 0x10(%0),%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,(%3) \n"
"movdqa %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3" , "xmm0", "xmm1", "xmm2", "xmm3"
#endif #endif
); );
} }
#endif #endif
...@@ -127,6 +197,104 @@ void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, ...@@ -127,6 +197,104 @@ void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
} }
} }
void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[1];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[0]; // duplicate last y
}
}
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define READWORD(p) (*((uint32*) (p)))
#define WRITEWORD(p, v) (*((uint32*) (p))) = v
#else
uint32 READWORD(const uint8* p) {
return (uint32) p[0] |
((uint32) (p[1]) << 8) |
((uint32) (p[2]) << 16) |
((uint32) (p[3]) << 24);
}
void WRITEWORD(uint8* p, uint32 v) {
p[0] = (uint8)(v & 255);
p[1] = (uint8)((v >> 8) & 255);
p[2] = (uint8)((v >> 16) & 255);
p[3] = (uint8)((v >> 24) & 255);
}
#endif
// Must be multiple of 6 pixels. Will over convert to handle remainder.
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
for (int x = 0; x < width; x += 6) {
uint32 w = READWORD(src_v210 + 0);
dst_uyvy[0] = (w >> 2) & 0xff;
dst_uyvy[1] = (w >> 12) & 0xff;
dst_uyvy[2] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 4);
dst_uyvy[3] = (w >> 2) & 0xff;
dst_uyvy[4] = (w >> 12) & 0xff;
dst_uyvy[5] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 8);
dst_uyvy[6] = (w >> 2) & 0xff;
dst_uyvy[7] = (w >> 12) & 0xff;
dst_uyvy[8] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 12);
dst_uyvy[9] = (w >> 2) & 0xff;
dst_uyvy[10] = (w >> 12) & 0xff;
dst_uyvy[11] = (w >> 22) & 0xff;
src_v210 += 16;
dst_uyvy += 12;
}
}
#define EIGHTTOTEN(x) (x << 2 | x >> 6)
void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) {
for (int x = 0; x < width; x += 6) {
WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) |
(EIGHTTOTEN(src_uyvy[1]) << 10) |
(EIGHTTOTEN(src_uyvy[2]) << 20));
WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) |
(EIGHTTOTEN(src_uyvy[4]) << 10) |
(EIGHTTOTEN(src_uyvy[5]) << 20));
WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) |
(EIGHTTOTEN(src_uyvy[7]) << 10) |
(EIGHTTOTEN(src_uyvy[8]) << 20));
WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) |
(EIGHTTOTEN(src_uyvy[10]) << 10) |
(EIGHTTOTEN(src_uyvy[11]) << 20));
src_uyvy += 12;
dst_v210 += 16;
}
}
int I422ToYUY2(const uint8* src_y, int src_stride_y, int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
...@@ -143,17 +311,16 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -143,17 +311,16 @@ int I422ToYUY2(const uint8* src_y, int src_stride_y,
} }
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2) #if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2; I42xToYUY2Row = I42xToYUY2Row_SSE2;
} else
#endif
{
I42xToYUY2Row = I42xToYUY2Row_C;
} }
#endif
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width); I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y; src_y += src_stride_y;
...@@ -180,17 +347,16 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -180,17 +347,16 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
} }
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width); const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2) #if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2; I42xToYUY2Row = I42xToYUY2Row_SSE2;
} else
#endif
{
I42xToYUY2Row = I42xToYUY2Row_C;
} }
#endif
for (int y = 0; y < height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width); I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I42xToYUY2Row(src_y + src_stride_y, src_u, src_v, I42xToYUY2Row(src_y + src_stride_y, src_u, src_v,
...@@ -206,6 +372,42 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y, ...@@ -206,6 +372,42 @@ int I420ToYUY2(const uint8* src_y, int src_stride_y,
return 0; return 0;
} }
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
}
return 0;
}
int I420ToUYVY(const uint8* src_y, int src_stride_y, int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
...@@ -214,107 +416,85 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y, ...@@ -214,107 +416,85 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1; return -1;
} }
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
int i = 0; for (int y = 0; y < height - 1; y += 2) {
const uint8* y1 = src_y; I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
const uint8* y2 = y1 + src_stride_y; I42xToUYVYRow(src_y + src_stride_y, src_u, src_v,
const uint8* u = src_u; dst_frame + dst_stride_frame, width);
const uint8* v = src_v; src_y += src_stride_y * 2;
src_u += src_stride_u;
uint8* out1 = dst_frame; src_v += src_stride_v;
uint8* out2 = dst_frame + dst_stride_frame; dst_frame += dst_stride_frame * 2;
// Macro-pixel = 2 image pixels
// U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
#ifndef SCALEOPT
for (; i < ((height + 1) >> 1); i++) {
for (int j = 0; j < ((width + 1) >> 1); j++) {
out1[0] = *u;
out1[1] = y1[0];
out1[2] = *v;
out1[3] = y1[1];
out2[0] = *u;
out2[1] = y2[0];
out2[2] = *v;
out2[3] = y2[1];
out1 += 4;
out2 += 4;
u++;
v++;
y1 += 2;
y2 += 2;
}
y1 += 2 * src_stride_y - width;
y2 += 2 * src_stride_y - width;
u += src_stride_u - ((width + 1) >> 1);
v += src_stride_v - ((width + 1) >> 1);
out1 += 2 * (dst_stride_frame - width);
out2 += 2 * (dst_stride_frame - width);
} }
#else if (height & 1) {
for (; i < (height >> 1);i++) { I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
int32 width__ = (width >> 4); }
_asm return 0;
{ }
;pusha
mov eax, DWORD PTR [in1] ;1939.33 int I420ToV210(const uint8* src_y, int src_stride_y,
mov ecx, DWORD PTR [in2] ;1939.33 const uint8* src_u, int src_stride_u,
mov ebx, DWORD PTR [src_u] ;1939.33 const uint8* src_v, int src_stride_v,
mov edx, DWORD PTR [src_v] ;1939.33 uint8* dst_frame, int dst_stride_frame,
loop0: int width, int height) {
movq xmm6, QWORD PTR [ebx] ;src_u if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required
movq xmm0, QWORD PTR [edx] ;src_v src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
punpcklbw xmm6, xmm0 ;src_u, src_v mix return -1;
movdqa xmm1, xmm6 }
movdqa xmm2, xmm6 // Negative height means invert the image.
movdqa xmm4, xmm6 if (height < 0) {
height = -height;
movdqu xmm3, XMMWORD PTR [eax] ;in1 dst_frame = dst_frame + (height - 1) * dst_stride_frame;
punpcklbw xmm1, xmm3 ;src_u, in1, src_v dst_stride_frame = -dst_stride_frame;
mov esi, DWORD PTR [out1] }
movdqu XMMWORD PTR [esi], xmm1 ;write to out1
SIMD_ALIGNED(uint8 row[kMaxStride]);
movdqu xmm5, XMMWORD PTR [ecx] ;in2 void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix);
punpcklbw xmm2, xmm5 ;src_u, in2, src_v UYVYToV210Row = UYVYToV210Row_C;
mov edi, DWORD PTR [out2]
movdqu XMMWORD PTR [edi], xmm2 ;write to out2 void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
punpckhbw xmm4, xmm3 ;src_u, in1, src_v again I42xToUYVYRow = I42xToUYVYRow_C;
movdqu XMMWORD PTR [esi+16], xmm4 ;write to out1 again #if defined(HAS_I42XTOUYVYROW_SSE2)
add esi, 32 if (TestCpuFlag(kCpuHasSSE2) &&
mov DWORD PTR [out1], esi IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
punpckhbw xmm6, xmm5 ;src_u, in2, src_v again I42xToUYVYRow = I42xToUYVYRow_SSE2;
movdqu XMMWORD PTR [edi+16], xmm6 ;write to out2 again
add edi, 32
mov DWORD PTR [out2], edi
add ebx, 8
add edx, 8
add eax, 16
add ecx, 16
mov esi, DWORD PTR [width__]
sub esi, 1
mov DWORD PTR [width__], esi
jg loop0
mov DWORD PTR [in1], eax ;1939.33
mov DWORD PTR [in2], ecx ;1939.33
mov DWORD PTR [src_u], ebx ;1939.33
mov DWORD PTR [src_v], edx ;1939.33
;popa
emms
}
in1 += width;
in2 += width;
out1 += 2 * (dst_stride_frame - width);
out2 += 2 * (dst_stride_frame - width);
} }
#endif #endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
}
return 0; return 0;
} }
...@@ -467,56 +647,6 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -467,56 +647,6 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
return 0; return 0;
} }
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define READWORD(p) (*((uint32*) (p)))
#else
uint32 READWORD(const uint8* p) {
return (uint32) p[0] |
((uint32) (p[1]) << 8) |
((uint32) (p[2]) << 16) |
((uint32) (p[3]) << 24);
}
#endif
// Must be multiple of 6 pixels. Will over convert to handle remainder.
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
for (int x = 0; x < width; x += 6) {
uint32 w = READWORD(src_v210 + 0);
dst_uyvy[0] = (w >> 2) & 0xff;
dst_uyvy[1] = (w >> 12) & 0xff;
dst_uyvy[2] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 4);
dst_uyvy[3] = (w >> 2) & 0xff;
dst_uyvy[4] = (w >> 12) & 0xff;
dst_uyvy[5] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 8);
dst_uyvy[6] = (w >> 2) & 0xff;
dst_uyvy[7] = (w >> 12) & 0xff;
dst_uyvy[8] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 12);
dst_uyvy[9] = (w >> 2) & 0xff;
dst_uyvy[10] = (w >> 12) & 0xff;
dst_uyvy[11] = (w >> 22) & 0xff;
dst_uyvy += 12;
src_v210 += 16;
}
}
// Convert V210 to I420. // Convert V210 to I420.
// V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels. // V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels.
// With is multiple of 48. // With is multiple of 48.
...@@ -525,7 +655,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210, ...@@ -525,7 +655,7 @@ int V210ToI420(const uint8* src_v210, int src_stride_v210,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (width * 16 / 6 > kMaxStride) { // row buffer is required if (width * 2 * 2 > kMaxStride) { // 2 rows of UYVY is required
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
...@@ -1098,9 +1228,11 @@ int ConvertToI420(const uint8* sample, size_t sample_size, ...@@ -1098,9 +1228,11 @@ int ConvertToI420(const uint8* sample, size_t sample_size,
dst_width, inv_dst_height); dst_width, inv_dst_height);
break; break;
case FOURCC_V210: case FOURCC_V210:
// TODO(fbarchard): Confirm stride is 16 bytes per 6 pixels. // stride is multiple of 48 pixels (128 bytes).
src = sample + (aligned_src_width * crop_y + crop_x) * 16 / 6; // pixels come in groups of 6 = 16 bytes
V210ToI420(src, aligned_src_width * 16 / 6, src = sample + (aligned_src_width + 47) / 48 * 128 * crop_y +
crop_x / 6 * 16;
V210ToI420(src, (aligned_src_width + 47) / 48 * 128,
y, y_stride, y, y_stride,
u, u_stride, u, u_stride,
v, v_stride, v, v_stride,
......
...@@ -49,6 +49,15 @@ int ConvertFromI420(const uint8* y, int y_stride, ...@@ -49,6 +49,15 @@ int ConvertFromI420(const uint8* y, int y_stride,
dst_sample_stride ? dst_sample_stride : width * 2, dst_sample_stride ? dst_sample_stride : width * 2,
width, height); width, height);
break; break;
case FOURCC_V210:
I420ToV210(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride :
(width + 47) / 48 * 128,
width, height);
break;
case FOURCC_RGBP: case FOURCC_RGBP:
I420ToRGB565(y, y_stride, I420ToRGB565(y, y_stride,
u, u_stride, u, u_stride,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment