Commit c4c578e3 authored by fbarchard@google.com's avatar fbarchard@google.com

Flat shade an ARGB image

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/683004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@298 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent c4500c9f
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 297
Version: 298
License: BSD
License File: LICENSE
......
......@@ -15,7 +15,7 @@
// TODO(fbarchard): Remove the following headers includes
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h"
#include "libyuv/convert_argb.h"
#ifdef __cplusplus
namespace libyuv {
......@@ -188,11 +188,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Multiply ARGB image by ARGB value.
int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
// Convert MJPG to ARGB.
int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* argb, int argb_stride,
......@@ -212,6 +207,11 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height, int radius);
// Multiply ARGB image by ARGB value.
int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 297
#define LIBYUV_VERSION 298
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -784,11 +784,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
return 0;
}
// Visual C for x86 defines these.
#if defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
// GCC provided macros.
#elif __BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN
// Visual C x86 or GCC little endian.
#if defined(_M_X64) || defined(_M_IX86) || (defined(__BYTE_ORDER) && \
(__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN))
#define LIBYUV_LITTLE_ENDIAN
#endif
......
......@@ -401,18 +401,15 @@ static void I42xToUYVYRow_C(const uint8* src_y,
}
}
// Visual C for x86 defines these.
#if defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
// GCC provided macros.
#elif __BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN
// Visual C x86 or GCC little endian.
#if defined(_M_X64) || defined(_M_IX86) || (defined(__BYTE_ORDER) && \
(__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN))
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define WRITEWORD(p, v) *reinterpret_cast<uint32*>(p) = v
#else
static inline void WRITEWORD(uint8* p, uint32 v) {
p[0] = (uint8)(v & 255);
p[1] = (uint8)((v >> 8) & 255);
......
......@@ -60,8 +60,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y,
uint8*, int,
uint8*, int,
int width, int height) {
if (!src_y || !dst_y ||
width <= 0 || height == 0) {
if (!src_y || !dst_y || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
......@@ -112,8 +111,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
......@@ -143,9 +141,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
int ARGBMirror(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb ||
!dst_argb ||
width <= 0 || height == 0) {
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
......@@ -224,6 +220,9 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height) {
if (!src_argb || !dst_y || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -255,6 +254,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -298,6 +300,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) {
if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -330,6 +335,9 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_raw, int dst_stride_raw,
int width, int height) {
if (!src_argb || !dst_raw || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -362,6 +370,9 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -393,6 +404,9 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height) {
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -424,6 +438,9 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height) {
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -457,6 +474,9 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
......@@ -499,6 +519,9 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
if (!src_y || !src_vu || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
......@@ -762,6 +785,9 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -796,6 +822,9 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
......@@ -866,7 +895,8 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
const int8* matrix_argb,
int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
if (!dst_argb || !matrix_argb || width <= 0 || height <= 0 ||
dst_x < 0 || dst_y < 0) {
return -1;
}
void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb,
......@@ -890,7 +920,8 @@ int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
dst_x < 0 || dst_y < 0) {
return -1;
}
void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
......@@ -972,6 +1003,9 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height, int radius) {
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft,
......@@ -1052,6 +1086,30 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value) {
if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
int width, uint32 value) = ARGBShadeRow_C;
#if defined(HAS_ARGBSHADE_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBShadeRow = ARGBShadeRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
ARGBShadeRow(src_argb, dst_argb, width, value);
src_argb += src_stride_argb;
dst_argb += dst_stride_argb;
}
return 0;
}
#ifdef __cplusplus
......
......@@ -85,6 +85,7 @@ extern "C" {
// The following are Windows only:
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBSHADE_SSE2
#endif
// The following are disabled when SSSE3 is available:
......@@ -516,6 +517,11 @@ void CumulativeSumToAverage_C(const int32* topleft, const int32* botleft,
void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width);
void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -956,6 +956,32 @@ void CumulativeSumToAverage_C(const int32* tl, const int32* bl,
}
}
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v * f >> 24
void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value) {
const uint32 b_scale = REPEAT8(value & 0xff);
const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
const uint32 a_scale = REPEAT8(value >> 24);
for (int i = 0; i < width; ++i) {
const uint32 b = REPEAT8(src_argb[0]);
const uint32 g = REPEAT8(src_argb[1]);
const uint32 r = REPEAT8(src_argb[2]);
const uint32 a = REPEAT8(src_argb[3]);
dst_argb[0] = SHADE(b, b_scale);
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
dst_argb[3] = SHADE(a, a_scale);
src_argb += 4;
dst_argb += 4;
}
}
#undef REPEAT8
#undef SHADE
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -15,12 +15,12 @@ namespace libyuv {
extern "C" {
#endif
// This module is for Visual C x86
// This module is for Visual C x86.
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#ifdef HAS_ARGBTOYROW_SSSE3
// Constants for ARGB
// Constants for ARGB.
static const vec8 kARGBToY = {
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
};
......@@ -33,7 +33,7 @@ static const vec8 kARGBToV = {
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
};
// Constants for BGRA
// Constants for BGRA.
static const vec8 kBGRAToY = {
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
};
......@@ -46,7 +46,7 @@ static const vec8 kBGRAToV = {
0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
};
// Constants for ABGR
// Constants for ABGR.
static const vec8 kABGRToY = {
33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
};
......@@ -247,13 +247,13 @@ __asm {
}
}
// pmul method to replicate bits
// Math to replicate bits
// pmul method to replicate bits.
// Math to replicate bits:
// (v << 8) | (v << 3)
// v * 256 + v * 8
// v * (256 + 8)
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
// 20 instructions
// 20 instructions.
__declspec(naked) __declspec(align(16))
void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
int pix) {
......@@ -358,7 +358,7 @@ __asm {
}
}
// 18 instructions
// 18 instructions.
__declspec(naked) __declspec(align(16))
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
int pix) {
......@@ -514,7 +514,7 @@ __asm {
}
}
// TODO(fbarchard): Improve sign extension/packing
// TODO(fbarchard): Improve sign extension/packing.
__declspec(naked) __declspec(align(16))
void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
......@@ -587,7 +587,7 @@ __asm {
}
}
// Convert 16 ARGB pixels (64 bytes) to 16 Y values
// Convert 16 ARGB pixels (64 bytes) to 16 Y values.
__declspec(naked) __declspec(align(16))
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
......@@ -1249,8 +1249,9 @@ static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
// TODO(fbarchard): NV12/NV21 fetch UV and use directly.
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
// Read 8 UV from 411
// Read 8 UV from 411.
#define READYUV444 __asm { \
__asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \
__asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \
......@@ -1258,7 +1259,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpcklbw xmm0, xmm1 /* UV */ \
}
// Read 4 UV from 422, upsample to 8 UV
// Read 4 UV from 422, upsample to 8 UV.
#define READYUV422 __asm { \
__asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \
......@@ -1267,7 +1268,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
}
// Read 2 UV from 411, upsample to 8 UV
// Read 2 UV from 411, upsample to 8 UV.
#define READYUV411 __asm { \
__asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \
......@@ -1277,14 +1278,14 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \
}
// Read 4 UV from NV12, upsample to 8 UV
// Read 4 UV from NV12, upsample to 8 UV.
#define READNV12 __asm { \
__asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \
__asm lea esi, [esi + 8] \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
}
// Convert 8 pixels: 8 UV and 8 Y
// Convert 8 pixels: 8 UV and 8 Y.
#define YUVTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \
......@@ -1312,7 +1313,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm packuswb xmm2, xmm2 /* R */ \
}
// Convert 8 pixels: 8 VU and 8 Y
// Convert 8 pixels: 8 VU and 8 Y.
#define YVUTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \
......@@ -1341,7 +1342,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
}
// 8 pixels, dest aligned 16.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes)
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
......@@ -1384,7 +1385,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
}
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
......@@ -1427,7 +1428,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
}
// 8 pixels, dest aligned 16.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more.
__declspec(naked) __declspec(align(16))
void I411ToARGBRow_SSSE3(const uint8* y_buf,
......@@ -1471,7 +1472,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
}
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
......@@ -1509,7 +1510,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
}
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
......@@ -1547,7 +1548,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
}
// 8 pixels, unaligned.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes)
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
......@@ -1590,7 +1591,7 @@ void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
}
// 8 pixels, unaligned.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
......@@ -1633,7 +1634,7 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
}
// 8 pixels, unaligned.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more.
__declspec(naked) __declspec(align(16))
void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
......@@ -1678,7 +1679,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
......@@ -1716,7 +1717,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
}
// 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes)
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16))
void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
......@@ -2127,7 +2128,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#endif // HAS_SPLITUV_SSE2
#ifdef HAS_COPYROW_SSE2
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
__declspec(naked) __declspec(align(16))
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
__asm {
......@@ -2574,13 +2575,13 @@ static const uvec8 kShuffleAlpha = {
3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
};
// Same as SSE2, but replaces
// Same as SSE2, but replaces:
// psrlw xmm3, 8 // alpha
// pshufhw xmm3, xmm3,0F5h // 8 alpha words
// pshuflw xmm3, xmm3,0F5h
// with..
// pshufb xmm3, kShuffleAlpha // alpha
// Blend 8 pixels at a time
// Blend 8 pixels at a time.
__declspec(naked) __declspec(align(16))
void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
......@@ -2698,7 +2699,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBATTENUATE_SSE2
// Attenuate 4 pixels at a time.
// aligned to 16 bytes
// Aligned to 16 bytes.
__declspec(naked) __declspec(align(16))
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
__asm {
......@@ -2741,7 +2742,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
#endif // HAS_ARGBATTENUATE_SSE2
#ifdef HAS_ARGBATTENUATE_SSSE3
// Shuffle table duplicating alpha
// Shuffle table duplicating alpha.
static const uvec8 kShuffleAlpha0 = {
3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
};
......@@ -2791,7 +2792,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
#ifdef HAS_ARGBUNATTENUATE_SSE2
// Unattenuate 4 pixels at a time.
// aligned to 16 bytes
// Aligned to 16 bytes.
__declspec(naked) __declspec(align(16))
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) {
......@@ -2845,12 +2846,12 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
#endif // HAS_ARGBUNATTENUATE_SSE2
#ifdef HAS_ARGBGRAYROW_SSSE3
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
// Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R
static const vec8 kARGBToGray = {
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
};
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
__declspec(naked) __declspec(align(16))
void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
__asm {
......@@ -2893,7 +2894,7 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
// b = (r * 35 + g * 68 + b * 17) >> 7
// g = (r * 45 + g * 88 + b * 22) >> 7
// r = (r * 50 + g * 98 + b * 24) >> 7
// Constant for ARGB color to sepia tone
// Constant for ARGB color to sepia tone.
static const vec8 kARGBToSepiaB = {
17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
};
......@@ -3071,7 +3072,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
// aligned to 16 bytes
// Aligned to 16 bytes.
__declspec(naked) __declspec(align(16))
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) {
......@@ -3306,6 +3307,42 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
}
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
#ifdef HAS_ARGBSHADE_SSE2
// Shade 4 pixels at a time by specified value.
// Aligned to 16 bytes.
__declspec(naked) __declspec(align(16))
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
movd xmm2, [esp + 16] // value
sub edx, eax
punpcklbw xmm2, xmm2
punpcklqdq xmm2, xmm2
align 16
convertloop:
movdqa xmm0, [eax] // read 4 pixels
movdqa xmm1, xmm0
punpcklbw xmm0, xmm0 // first 2
punpckhbw xmm1, xmm1 // next 2
pmulhuw xmm0, xmm2 // argb * value
pmulhuw xmm1, xmm2 // argb * value
psrlw xmm0, 8
psrlw xmm1, 8
packuswb xmm0, xmm1
sub ecx, 4
movdqa [eax + edx], xmm0
lea eax, [eax + 16]
jg convertloop
ret
}
}
#endif // HAS_ARGBSHADE_SSE2
#endif // _M_IX86
......
......@@ -331,7 +331,6 @@ TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4)
TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4)
TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4)
TEST_F(libyuvTest, TestAttenuate) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
SIMD_ALIGNED(uint8 atten_pixels[256][4]);
......@@ -649,4 +648,56 @@ TEST_F(libyuvTest, TestARGBMirror) {
}
}
TEST_F(libyuvTest, TestShade) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
SIMD_ALIGNED(uint8 shade_pixels[256][4]);
// Test unattenuation clamps
orig_pixels[0][0] = 10u;
orig_pixels[0][1] = 20u;
orig_pixels[0][2] = 40u;
orig_pixels[0][3] = 80u;
// Test unattenuation transparent and opaque are unaffected
orig_pixels[1][0] = 0u;
orig_pixels[1][1] = 0u;
orig_pixels[1][2] = 0u;
orig_pixels[1][3] = 255u;
orig_pixels[2][0] = 0u;
orig_pixels[2][1] = 0u;
orig_pixels[2][2] = 0u;
orig_pixels[2][3] = 0u;
orig_pixels[3][0] = 0u;
orig_pixels[3][1] = 0u;
orig_pixels[3][2] = 0u;
orig_pixels[3][3] = 0u;
ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80ffffff);
EXPECT_EQ(10u, shade_pixels[0][0]);
EXPECT_EQ(20u, shade_pixels[0][1]);
EXPECT_EQ(40u, shade_pixels[0][2]);
EXPECT_EQ(40u, shade_pixels[0][3]);
EXPECT_EQ(0u, shade_pixels[1][0]);
EXPECT_EQ(0u, shade_pixels[1][1]);
EXPECT_EQ(0u, shade_pixels[1][2]);
EXPECT_EQ(128u, shade_pixels[1][3]);
EXPECT_EQ(0u, shade_pixels[2][0]);
EXPECT_EQ(0u, shade_pixels[2][1]);
EXPECT_EQ(0u, shade_pixels[2][2]);
EXPECT_EQ(0u, shade_pixels[2][3]);
EXPECT_EQ(0u, shade_pixels[3][0]);
EXPECT_EQ(0u, shade_pixels[3][1]);
EXPECT_EQ(0u, shade_pixels[3][2]);
EXPECT_EQ(0u, shade_pixels[3][3]);
ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80808080);
EXPECT_EQ(5u, shade_pixels[0][0]);
EXPECT_EQ(10u, shade_pixels[0][1]);
EXPECT_EQ(20u, shade_pixels[0][2]);
EXPECT_EQ(40u, shade_pixels[0][3]);
for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 256, 1,
0x80808080);
}
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment