Commit b6149763 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGB to and from I420 ported to x64

BUG=none
TEST=media_unittests
Review URL: http://webrtc-codereview.appspot.com/266003

git-svn-id: http://libyuv.googlecode.com/svn/trunk@61 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 755de365
......@@ -19,15 +19,6 @@
#ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED
#ifdef COMPILER_MSVC
typedef __int64 int64;
#else
typedef long long int64;
#endif /* COMPILER_MSVC */
typedef int int32;
typedef short int16;
typedef char int8;
#ifdef COMPILER_MSVC
typedef unsigned __int64 uint64;
typedef __int64 int64;
......@@ -38,7 +29,18 @@ typedef __int64 int64;
#define UINT64_C(x) x ## UI64
#endif
#define INT64_F "I64"
#else
#else // COMPILER_MSVC
#ifdef __LP64__
typedef unsigned long uint64;
typedef long int64;
#ifndef INT64_C
#define INT64_C(x) x ## L
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UL
#endif
#define INT64_F "l"
#else // __LP64__
typedef unsigned long long uint64;
typedef long long int64;
#ifndef INT64_C
......@@ -48,10 +50,14 @@ typedef long long int64;
#define UINT64_C(x) x ## ULL
#endif
#define INT64_F "ll"
#endif /* COMPILER_MSVC */
#endif // __LP64__
#endif // COMPILER_MSVC
typedef unsigned int uint32;
typedef int int32;
typedef unsigned short uint16;
typedef short int16;
typedef unsigned char uint8;
typedef char int8;
#endif // INT_TYPES_DEFINED
// Detect compiler is for x86 or x64.
......
......@@ -13,6 +13,7 @@
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h"
namespace libyuv {
......@@ -92,6 +93,17 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
} // namespace libyuv
// Convert camera sample to I420 with cropping, rotation and vertical flip.
int ConvertToI420(const uint8* src_frame, size_t src_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int horiz_crop, int vert_crop,
int w, int h,
int dw, int idh,
RotationMode rotation,
uint32 format);
} // namespace libyuv
#endif // INCLUDE_LIBYUV_CONVERT_H_
......@@ -27,7 +27,9 @@ static const int kCpuInitialized = 8;
bool TestCpuFlag(int flag);
// For testing, allow CPU flags to be disabled.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. -1 to enable all.
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
// -1 to enable all cpu specific optimizations.
// 0 to disable all cpu specific optimizations.
void MaskCpuFlags(int enable_flags);
} // namespace libyuv
......
......@@ -13,7 +13,11 @@
#include "conversion_tables.h"
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "row.h"
#include "video_common.h"
//#define SCALEOPT //Currently for windows only. June 2010
......@@ -650,7 +654,7 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ARGBTOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
......@@ -661,7 +665,7 @@ int ARGBToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = ARGBToYRow_C;
}
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
......@@ -703,7 +707,7 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame,
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_BGRATOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
......@@ -714,7 +718,7 @@ int BGRAToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = BGRAToYRow_C;
}
#if defined(HAS_BGRATOUVROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
......@@ -756,7 +760,7 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame,
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ABGRTOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
......@@ -767,7 +771,7 @@ int ABGRToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = ABGRToYRow_C;
}
#if defined(HAS_ABGRTOUVROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
......@@ -809,7 +813,7 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RGB24TOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
......@@ -820,7 +824,7 @@ int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = RGB24ToYRow_C;
}
#if defined(HAS_RGB24TOUVROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
......@@ -862,7 +866,7 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_RAWTOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
......@@ -873,7 +877,7 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
ARGBToYRow = RAWToYRow_C;
}
#if defined(HAS_RAWTOUVROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
if (TestCpuFlag(kCpuHasSSSE3) &&
(width % 16 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
......@@ -901,4 +905,163 @@ int RAWToI420(const uint8* src_frame, int src_stride_frame,
return 0;
}
// Convert camera sample to I420 with cropping, rotation and vertical flip.
int ConvertToI420(const uint8* sample, size_t sample_size,
uint8* y, int y_stride,
uint8* u, int u_stride,
uint8* v, int v_stride,
int horiz_crop, int vert_crop,
int w, int h,
int dw, int idh,
RotationMode rotation,
uint32 format) {
int aw = (w + 1) & ~1;
const uint8* src;
const uint8* src_uv;
int abs_h = (h < 0) ? -h : h;
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aw * vert_crop + horiz_crop) * 2 ;
YUY2ToI420(src, aw * 2,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_UYVY:
src = sample + (aw * vert_crop + horiz_crop) * 2;
UYVYToI420(src, aw * 2,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_24BG:
src = sample + (w * vert_crop + horiz_crop) * 3;
RGB24ToI420(src, w * 3,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_RAW:
src = sample + (w * vert_crop + horiz_crop) * 3;
RAWToI420(src, w * 3,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_ARGB:
src = sample + (w * vert_crop + horiz_crop) * 4;
ARGBToI420(src, w * 4,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_BGRA:
src = sample + (w * vert_crop + horiz_crop) * 4;
BGRAToI420(src, w * 4,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_ABGR:
src = sample + (w * vert_crop + horiz_crop) * 4;
ABGRToI420(src, w * 4,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_BGGR:
case FOURCC_RGGB:
case FOURCC_GRBG:
case FOURCC_GBRG:
// TODO(fbarchard): We could support cropping by odd numbers by
// adjusting fourcc.
src = sample + (w * vert_crop + horiz_crop);
BayerRGBToI420(src, w, format,
y, y_stride, u, u_stride, v, v_stride,
dw, idh);
break;
// Biplanar formats
case FOURCC_M420:
src = sample + (w * vert_crop) * 12 / 8 + horiz_crop;
M420ToI420(src, w,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
case FOURCC_NV12:
src = sample + (w * vert_crop + horiz_crop);
src_uv = sample + aw * (h + vert_crop / 2) + horiz_crop;
NV12ToI420Rotate(src, w,
src_uv, aw,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh, rotation);
break;
case FOURCC_NV21:
src = sample + (w * vert_crop + horiz_crop);
src_uv = sample + aw * (h + vert_crop / 2) + horiz_crop;
// Call NV12 but with u and v parameters swapped.
NV12ToI420Rotate(src, w,
src_uv, aw,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh, rotation);
break;
case FOURCC_Q420:
src = sample + (w + aw * 2) * vert_crop + horiz_crop;
src_uv = sample + (w + aw * 2) * vert_crop + w + horiz_crop * 2;
Q420ToI420(src, w * 3,
src_uv, w * 3,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YV12: {
const uint8* src_y = sample + (w * vert_crop + horiz_crop);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (w + 1) / 2;
int halfheight = (abs_h + 1) / 2;
if (format == FOURCC_I420) {
src_u = sample + w * abs_h +
(halfwidth * vert_crop + horiz_crop) / 2;
src_v = sample + w * abs_h +
halfwidth * (halfheight + vert_crop / 2) + horiz_crop / 2;
} else {
src_v = sample + w * abs_h +
(halfwidth * vert_crop + horiz_crop) / 2;
src_u = sample + w * abs_h +
halfwidth * (halfheight + vert_crop / 2) + horiz_crop / 2;
}
I420Rotate(src_y, w,
src_u, halfwidth,
src_v, halfwidth,
y, y_stride,
u, u_stride,
v, v_stride,
dw, idh, rotation);
break;
}
// Formats not supported
case FOURCC_MJPG:
default:
return -1; // unknown fourcc - return failure code.
}
return 0;
}
} // namespace libyuv
......@@ -14,11 +14,14 @@
#ifdef _MSC_VER
#include <intrin.h>
#endif
#ifdef __ANDROID__
#include <cpu-features.h>
#endif
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile (
asm volatile (
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
......@@ -28,7 +31,7 @@ static inline void __cpuid(int cpu_info[4], int info_type) {
}
#elif defined(__i386__) || defined(__x86_64__)
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile (
asm volatile (
"cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
......@@ -49,6 +52,10 @@ static void InitCpuFlags() {
cpu_info_ = (cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0) |
(cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
kCpuInitialized;
#elif defined(__ANDROID__) && defined(__ARM_NEON__)
features = android_getCpuFeatures();
cpu_info_ = (features & ANDROID_CPU_ARM_FEATURE_NEON) ? kCpuHasNEON : 0) |
kCpuInitialized;
#elif defined(__ARM_NEON__)
// gcc -mfpu=neon defines __ARM_NEON__
// Enable Neon if you want support for Neon and Arm, and use MaskCpuFlags
......@@ -61,14 +68,14 @@ static void InitCpuFlags() {
void MaskCpuFlags(int enable_flags) {
InitCpuFlags();
cpu_info_ &= enable_flags;
cpu_info_ = (cpu_info_ & enable_flags) | kCpuInitialized;
}
bool TestCpuFlag(int flag) {
if (0 == cpu_info_) {
InitCpuFlags();
}
return cpu_info_ & flag ? true : false;
return (cpu_info_ & flag) ? true : false;
}
} // namespace libyuv
......@@ -14,8 +14,6 @@
#include "video_common.h"
#include "row.h"
#define kMaxStride (2048 * 4)
namespace libyuv {
// Note: to do this with Neon vld4.8 would load ARGB values into 4 registers
......@@ -168,7 +166,7 @@ static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 r = src_bayer1[1];
for (int x = 0; x < (pix - 2); x += 2) {
for (int x = 0; x < pix - 3; x += 2) {
dst_rgb[0] = src_bayer0[0];
dst_rgb[1] = AVG(g, src_bayer0[1]);
dst_rgb[2] = AVG(r, src_bayer1[1]);
......@@ -187,10 +185,12 @@ static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
dst_rgb[1] = AVG(g, src_bayer0[1]);
dst_rgb[2] = AVG(r, src_bayer1[1]);
dst_rgb[3] = 255U;
dst_rgb[4] = src_bayer0[0];
dst_rgb[5] = src_bayer0[1];
dst_rgb[6] = src_bayer1[1];
dst_rgb[7] = 255U;
if (pix & 1) {
dst_rgb[4] = src_bayer0[0];
dst_rgb[5] = src_bayer0[1];
dst_rgb[6] = src_bayer1[1];
dst_rgb[7] = 255U;
}
}
static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
......@@ -198,7 +198,7 @@ static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 b = src_bayer1[1];
for (int x = 0; x < (pix - 2); x += 2) {
for (int x = 0; x < pix - 3; x += 2) {
dst_rgb[0] = AVG(b, src_bayer1[1]);
dst_rgb[1] = AVG(g, src_bayer0[1]);
dst_rgb[2] = src_bayer0[0];
......@@ -217,17 +217,19 @@ static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
dst_rgb[1] = AVG(g, src_bayer0[1]);
dst_rgb[2] = src_bayer0[0];
dst_rgb[3] = 255U;
dst_rgb[4] = src_bayer1[1];
dst_rgb[5] = src_bayer0[1];
dst_rgb[6] = src_bayer0[0];
dst_rgb[7] = 255U;
if (pix & 1) {
dst_rgb[4] = src_bayer1[1];
dst_rgb[5] = src_bayer0[1];
dst_rgb[6] = src_bayer0[0];
dst_rgb[7] = 255U;
}
}
static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_rgb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 b = src_bayer0[1];
for (int x = 0; x < (pix - 2); x += 2) {
for (int x = 0; x < pix - 3; x += 2) {
dst_rgb[0] = AVG(b, src_bayer0[1]);
dst_rgb[1] = src_bayer0[0];
dst_rgb[2] = src_bayer1[0];
......@@ -245,17 +247,19 @@ static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
dst_rgb[1] = src_bayer0[0];
dst_rgb[2] = src_bayer1[0];
dst_rgb[3] = 255U;
dst_rgb[4] = src_bayer0[1];
dst_rgb[5] = src_bayer0[0];
dst_rgb[6] = src_bayer1[0];
dst_rgb[7] = 255U;
if (pix & 1) {
dst_rgb[4] = src_bayer0[1];
dst_rgb[5] = src_bayer0[0];
dst_rgb[6] = src_bayer1[0];
dst_rgb[7] = 255U;
}
}
static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_rgb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 r = src_bayer0[1];
for (int x = 0; x < (pix - 2); x += 2) {
for (int x = 0; x < pix - 3; x += 2) {
dst_rgb[0] = src_bayer1[0];
dst_rgb[1] = src_bayer0[0];
dst_rgb[2] = AVG(r, src_bayer0[1]);
......@@ -273,10 +277,12 @@ static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
dst_rgb[1] = src_bayer0[0];
dst_rgb[2] = AVG(r, src_bayer0[1]);
dst_rgb[3] = 255U;
dst_rgb[4] = src_bayer1[0];
dst_rgb[5] = src_bayer0[0];
dst_rgb[6] = src_bayer0[1];
dst_rgb[7] = 255U;
if (pix & 1) {
dst_rgb[4] = src_bayer1[0];
dst_rgb[5] = src_bayer0[0];
dst_rgb[6] = src_bayer0[1];
dst_rgb[7] = 255U;
}
}
// Converts any Bayer RGB format to ARGB.
......@@ -315,7 +321,7 @@ int BayerRGBToARGB(const uint8* src_bayer, int src_stride_bayer,
break;
}
for (int y = 0; y < (height - 1); y += 2) {
for (int y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, dst_rgb, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
dst_rgb + dst_stride_rgb, width);
......@@ -403,7 +409,7 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
break;
}
for (int y = 0; y < (height - 1); y += 2) {
for (int y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
row + kMaxStride, width);
......
This diff is collapsed.
......@@ -13,9 +13,13 @@
#include "libyuv/basic_types.h"
#define kMaxStride (2048 * 4)
// The following are available on all x86 platforms
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) \
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_ABGRTOARGBROW_SSSE3
#define HAS_BGRATOARGBROW_SSSE3
#define HAS_ARGBTOYROW_SSSE3
#define HAS_BG24TOARGBROW_SSSE3
#define HAS_RAWTOARGBROW_SSSE3
......@@ -23,19 +27,41 @@
#define HAS_RAWTOYROW_SSSE3
#define HAS_RGB24TOUVROW_SSSE3
#define HAS_RAWTOUVROW_SSSE3
#endif
// The following are available only on Windows
#if defined(WIN32) \
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_BGRATOYROW_SSSE3
#define HAS_ABGRTOYROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
#endif
// The following are available on Windows and Linux
#if (defined(WIN32) || defined(__x86_64__) || \
(defined(__i386__) && !defined(__pic__))) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_ARGBTOUVROW_SSSE3
#define HAS_BGRATOUVROW_SSSE3
#define HAS_ABGRTOUVROW_SSSE3
#endif
// The following are available on Linux (32/64 bit)
// TODO(fbarchard): enable for fpic on linux
#if (defined(__x86_64__) || \
(defined(__i386__) && !defined(__pic__))) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_FASTCONVERTYUVTOARGBROW_SSE2
#define HAS_FASTCONVERTYUVTOBGRAROW_SSE2
#define HAS_FASTCONVERTYUVTOABGRROW_SSE2
#endif
// The following are available on Windows and GCC 32 bit
#if (defined(WIN32) || \
defined(__i386__)) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_FASTCONVERTYUVTOARGBROW_MMX
#define HAS_FASTCONVERTYUVTOBGRAROW_MMX
#define HAS_FASTCONVERTYUVTOABGRROW_MMX
#endif
extern "C" {
#ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
......@@ -75,56 +101,128 @@ void RAWToUVRow_C(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#ifdef HAS_BG24TOARGBROW_SSSE3
void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix);
void RAWToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix);
#endif
void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix);
void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix);
void BG24ToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix);
void RAWToARGBRow_C(const uint8* src_bg24, uint8* dst_argb, int pix);
#ifdef HAS_I400TOARGBROW_SSE2
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
#endif
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
#if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
#else
#else // __GNUC__
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
typedef signed char __attribute__((vector_size(16))) vec8;
typedef unsigned char __attribute__((vector_size(16))) uvec8;
#endif
#ifdef OSX
extern SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
extern SIMD_ALIGNED(const int16 kCoefficientsBgraY[768][4]);
extern SIMD_ALIGNED(const int16 kCoefficientsAbgrY[768][4]);
#else
extern SIMD_ALIGNED(const int16 _kCoefficientsRgbY[768][4]);
extern SIMD_ALIGNED(const int16 _kCoefficientsBgraY[768][4]);
extern SIMD_ALIGNED(const int16 _kCoefficientsAbgrY[768][4]);
#endif
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToBGRARow(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
extern "C" SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
extern "C" SIMD_ALIGNED(const int16 kCoefficientsBgraY[768][4]);
extern "C" SIMD_ALIGNED(const int16 kCoefficientsAbgrY[768][4]);
void FastConvertYUVToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToBGRARow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToABGRRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToABGRRow(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
void FastConvertYToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSE2
void FastConvertYUVToARGBRow_SSE2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToARGBRow4_SSE2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToBGRARow_SSE2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToABGRRow_SSE2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToARGBRow_SSE2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYToARGBRow_SSE2(const uint8* y_buf,
uint8* rgb_buf,
int width);
#endif
#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYToARGBRow_MMX(const uint8* y_buf,
uint8* rgb_buf,
int width);
#endif
// Method to force C version.
//#define USE_MMX 0
......
This diff is collapsed.
......@@ -10,8 +10,6 @@
#include "row.h"
#define kMaxStride (2048 * 4)
extern "C" {
#define MAKETABLE(NAME) \
......@@ -232,11 +230,7 @@ SIMD_ALIGNED(const int16 NAME[256 * 3][4]) = {\
0 \
}
#ifdef OSX
MAKETABLE(kCoefficientsRgbY)
#else
MAKETABLE(_kCoefficientsRgbY)
#endif
#undef RGBY
#undef RGBU
......@@ -264,12 +258,7 @@ MAKETABLE(_kCoefficientsRgbY)
0 \
}
#ifdef OSX
MAKETABLE(kCoefficientsBgraY)
#else
MAKETABLE(_kCoefficientsBgraY)
#endif
#undef RGBY
#undef RGBU
......@@ -297,12 +286,39 @@ MAKETABLE(_kCoefficientsBgraY)
0 \
}
#ifdef OSX
MAKETABLE(kCoefficientsAbgrY)
#else
MAKETABLE(_kCoefficientsAbgrY)
#endif
void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix) {
for (int x = 0; x < pix; ++x) {
// To support in-place conversion.
uint8 r = src_abgr[0];
uint8 g = src_abgr[1];
uint8 b = src_abgr[2];
uint8 a = src_abgr[3];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_abgr += 4;
}
}
void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix) {
for (int x = 0; x < pix; ++x) {
// To support in-place conversion.
uint8 a = src_bgra[0];
uint8 r = src_bgra[1];
uint8 g = src_bgra[2];
uint8 b = src_bgra[3];
dst_argb[0] = b;
dst_argb[1] = g;
dst_argb[2] = r;
dst_argb[3] = a;
dst_argb += 4;
src_bgra += 4;
}
}
void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix) {
for (int x = 0; x < pix; ++x) {
......@@ -466,4 +482,133 @@ void RAWToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
#endif
#endif
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix) {
// Copy a Y to RGB.
for (int x = 0; x < pix; ++x) {
uint8 y = src_y[0];
dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
dst_argb[3] = 255u;
dst_argb += 4;
++src_y;
}
}
// C reference code that mimic the YUV assembly.
#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
(((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
static inline void YuvPixel(uint8 y,
uint8 u,
uint8 v,
uint8* rgb_buf,
int ashift,
int rshift,
int gshift,
int bshift) {
int b = kCoefficientsRgbY[256+u][0];
int g = kCoefficientsRgbY[256+u][1];
int r = kCoefficientsRgbY[256+u][2];
int a = kCoefficientsRgbY[256+u][3];
b = paddsw(b, kCoefficientsRgbY[512+v][0]);
g = paddsw(g, kCoefficientsRgbY[512+v][1]);
r = paddsw(r, kCoefficientsRgbY[512+v][2]);
a = paddsw(a, kCoefficientsRgbY[512+v][3]);
b = paddsw(b, kCoefficientsRgbY[y][0]);
g = paddsw(g, kCoefficientsRgbY[y][1]);
r = paddsw(r, kCoefficientsRgbY[y][2]);
a = paddsw(a, kCoefficientsRgbY[y][3]);
b >>= 6;
g >>= 6;
r >>= 6;
a >>= 6;
*reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b) << bshift) |
(packuswb(g) << gshift) |
(packuswb(r) << rshift) |
(packuswb(a) << ashift);
}
void FastConvertYUVToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 16, 8, 0);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 16, 8, 0);
}
}
void FastConvertYUVToBGRARow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 0, 8, 16, 24);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 0, 8, 16, 24);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 0, 8, 16, 24);
}
}
void FastConvertYUVToABGRRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width - 1; x += 2) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
YuvPixel(y_buf[1], u_buf[0], v_buf[0], rgb_buf + 4, 24, 0, 8, 16);
y_buf += 2;
u_buf += 1;
v_buf += 1;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf + 0, 24, 0, 8, 16);
}
}
void FastConvertYUV444ToARGBRow_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], u_buf[0], v_buf[0], rgb_buf, 24, 16, 8, 0);
y_buf += 1;
u_buf += 1;
v_buf += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
void FastConvertYToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
y_buf += 1;
rgb_buf += 4; // Advance 1 pixel.
}
}
} // extern "C"
This diff is collapsed.
......@@ -42,6 +42,7 @@ enum FourCC {
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment