Commit c4c578e3 authored by fbarchard@google.com's avatar fbarchard@google.com

Flat shade an ARGB image

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/683004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@298 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent c4500c9f
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 297 Version: 298
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
// TODO(fbarchard): Remove the following headers includes // TODO(fbarchard): Remove the following headers includes
#include "libyuv/convert.h" #include "libyuv/convert.h"
#include "libyuv/planar_functions.h" #include "libyuv/convert_argb.h"
#ifdef __cplusplus #ifdef __cplusplus
namespace libyuv { namespace libyuv {
...@@ -188,11 +188,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, ...@@ -188,11 +188,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Multiply ARGB image by ARGB value.
int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
// Convert MJPG to ARGB. // Convert MJPG to ARGB.
int MJPGToARGB(const uint8* sample, size_t sample_size, int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* argb, int argb_stride, uint8* argb, int argb_stride,
...@@ -212,6 +207,11 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, ...@@ -212,6 +207,11 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum, int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height, int radius); int width, int height, int radius);
// Multiply ARGB image by ARGB value.
int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 297 #define LIBYUV_VERSION 298
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -784,11 +784,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -784,11 +784,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
return 0; return 0;
} }
// Visual C for x86 defines these. // Visual C x86 or GCC little endian.
#if defined(_M_X64) || defined(_M_IX86) #if defined(_M_X64) || defined(_M_IX86) || (defined(__BYTE_ORDER) && \
#define LIBYUV_LITTLE_ENDIAN (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN))
// GCC provided macros.
#elif __BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN #define LIBYUV_LITTLE_ENDIAN
#endif #endif
......
...@@ -401,18 +401,15 @@ static void I42xToUYVYRow_C(const uint8* src_y, ...@@ -401,18 +401,15 @@ static void I42xToUYVYRow_C(const uint8* src_y,
} }
} }
// Visual C for x86 defines these. // Visual C x86 or GCC little endian.
#if defined(_M_X64) || defined(_M_IX86) #if defined(_M_X64) || defined(_M_IX86) || (defined(__BYTE_ORDER) && \
#define LIBYUV_LITTLE_ENDIAN (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN))
// GCC provided macros.
#elif __BYTE_ORDER == __ORDER_LITTLE_ENDIAN__ || __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN #define LIBYUV_LITTLE_ENDIAN
#endif #endif
#ifdef LIBYUV_LITTLE_ENDIAN #ifdef LIBYUV_LITTLE_ENDIAN
#define WRITEWORD(p, v) *reinterpret_cast<uint32*>(p) = v #define WRITEWORD(p, v) *reinterpret_cast<uint32*>(p) = v
#else #else
static inline void WRITEWORD(uint8* p, uint32 v) { static inline void WRITEWORD(uint8* p, uint32 v) {
p[0] = (uint8)(v & 255); p[0] = (uint8)(v & 255);
p[1] = (uint8)((v >> 8) & 255); p[1] = (uint8)((v >> 8) & 255);
......
...@@ -60,8 +60,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y, ...@@ -60,8 +60,7 @@ int I420ToI400(const uint8* src_y, int src_stride_y,
uint8*, int, uint8*, int,
uint8*, int, uint8*, int,
int width, int height) { int width, int height) {
if (!src_y || !dst_y || if (!src_y || !dst_y || width <= 0 || height == 0) {
width <= 0 || height == 0) {
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
...@@ -112,8 +111,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y, ...@@ -112,8 +111,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (!src_y || !src_u || !src_v || if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) { width <= 0 || height == 0) {
return -1; return -1;
} }
...@@ -143,9 +141,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y, ...@@ -143,9 +141,7 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
int ARGBMirror(const uint8* src_argb, int src_stride_argb, int ARGBMirror(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height) { int width, int height) {
if (!src_argb || if (!src_argb || !dst_argb || width <= 0 || height == 0) {
!dst_argb ||
width <= 0 || height == 0) {
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
...@@ -224,6 +220,9 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, ...@@ -224,6 +220,9 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
int ARGBToI400(const uint8* src_argb, int src_stride_argb, int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
int width, int height) { int width, int height) {
if (!src_argb || !dst_y || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -255,6 +254,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, ...@@ -255,6 +254,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -298,6 +300,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb, ...@@ -298,6 +300,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24, uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height) { int width, int height) {
if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -330,6 +335,9 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, ...@@ -330,6 +335,9 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
int ARGBToRAW(const uint8* src_argb, int src_stride_argb, int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_raw, int dst_stride_raw, uint8* dst_raw, int dst_stride_raw,
int width, int height) { int width, int height) {
if (!src_argb || !dst_raw || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -362,6 +370,9 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, ...@@ -362,6 +370,9 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) { int width, int height) {
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -393,6 +404,9 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, ...@@ -393,6 +404,9 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555, uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height) { int width, int height) {
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -424,6 +438,9 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, ...@@ -424,6 +438,9 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444, uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height) { int width, int height) {
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -457,6 +474,9 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -457,6 +474,9 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv, const uint8* src_uv, int src_stride_uv,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) { int width, int height) {
if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
...@@ -499,6 +519,9 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -499,6 +519,9 @@ int NV21ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu, const uint8* src_vu, int src_stride_vu,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) { int width, int height) {
if (!src_y || !src_vu || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
...@@ -762,6 +785,9 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, ...@@ -762,6 +785,9 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height) { int width, int height) {
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -796,6 +822,9 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, ...@@ -796,6 +822,9 @@ int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height) { int width, int height) {
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
...@@ -866,7 +895,8 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb, ...@@ -866,7 +895,8 @@ int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb, int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
const int8* matrix_argb, const int8* matrix_argb,
int dst_x, int dst_y, int width, int height) { int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { if (!dst_argb || !matrix_argb || width <= 0 || height <= 0 ||
dst_x < 0 || dst_y < 0) {
return -1; return -1;
} }
void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb, void (*ARGBColorMatrixRow)(uint8* dst_argb, const int8* matrix_argb,
...@@ -890,7 +920,8 @@ int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb, ...@@ -890,7 +920,8 @@ int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb, const uint8* table_argb,
int dst_x, int dst_y, int width, int height) { int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
dst_x < 0 || dst_y < 0) {
return -1; return -1;
} }
void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
...@@ -972,6 +1003,9 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, ...@@ -972,6 +1003,9 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum, int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height, int radius) { int width, int height, int radius) {
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1;
}
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft, void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft,
...@@ -1052,6 +1086,30 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, ...@@ -1052,6 +1086,30 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
int ARGBShade(const uint8* src_argb, int src_stride_argb, int ARGBShade(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height, uint32 value) { int width, int height, uint32 value) {
if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
return -1;
}
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
int width, uint32 value) = ARGBShadeRow_C;
#if defined(HAS_ARGBSHADE_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBShadeRow = ARGBShadeRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
ARGBShadeRow(src_argb, dst_argb, width, value);
src_argb += src_stride_argb;
dst_argb += dst_stride_argb;
}
return 0;
} }
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -85,6 +85,7 @@ extern "C" { ...@@ -85,6 +85,7 @@ extern "C" {
// The following are Windows only: // The following are Windows only:
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86 #define HAS_ARGBCOLORTABLEROW_X86
#define HAS_ARGBSHADE_SSE2
#endif #endif
// The following are disabled when SSSE3 is available: // The following are disabled when SSSE3 is available:
...@@ -516,6 +517,11 @@ void CumulativeSumToAverage_C(const int32* topleft, const int32* botleft, ...@@ -516,6 +517,11 @@ void CumulativeSumToAverage_C(const int32* topleft, const int32* botleft,
void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width); const int32* previous_cumsum, int width);
void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -956,6 +956,32 @@ void CumulativeSumToAverage_C(const int32* tl, const int32* bl, ...@@ -956,6 +956,32 @@ void CumulativeSumToAverage_C(const int32* tl, const int32* bl,
} }
} }
#define REPEAT8(v) (v) | ((v) << 8)
#define SHADE(f, v) v * f >> 24
void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value) {
const uint32 b_scale = REPEAT8(value & 0xff);
const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
const uint32 a_scale = REPEAT8(value >> 24);
for (int i = 0; i < width; ++i) {
const uint32 b = REPEAT8(src_argb[0]);
const uint32 g = REPEAT8(src_argb[1]);
const uint32 r = REPEAT8(src_argb[2]);
const uint32 a = REPEAT8(src_argb[3]);
dst_argb[0] = SHADE(b, b_scale);
dst_argb[1] = SHADE(g, g_scale);
dst_argb[2] = SHADE(r, r_scale);
dst_argb[3] = SHADE(a, a_scale);
src_argb += 4;
dst_argb += 4;
}
}
#undef REPEAT8
#undef SHADE
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -15,12 +15,12 @@ namespace libyuv { ...@@ -15,12 +15,12 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// This module is for Visual C x86 // This module is for Visual C x86.
#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #if !defined(YUV_DISABLE_ASM) && defined(_M_IX86)
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
// Constants for ARGB // Constants for ARGB.
static const vec8 kARGBToY = { static const vec8 kARGBToY = {
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
}; };
...@@ -33,7 +33,7 @@ static const vec8 kARGBToV = { ...@@ -33,7 +33,7 @@ static const vec8 kARGBToV = {
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
}; };
// Constants for BGRA // Constants for BGRA.
static const vec8 kBGRAToY = { static const vec8 kBGRAToY = {
0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13
}; };
...@@ -46,7 +46,7 @@ static const vec8 kBGRAToV = { ...@@ -46,7 +46,7 @@ static const vec8 kBGRAToV = {
0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18
}; };
// Constants for ABGR // Constants for ABGR.
static const vec8 kABGRToY = { static const vec8 kABGRToY = {
33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0
}; };
...@@ -247,13 +247,13 @@ __asm { ...@@ -247,13 +247,13 @@ __asm {
} }
} }
// pmul method to replicate bits // pmul method to replicate bits.
// Math to replicate bits // Math to replicate bits:
// (v << 8) | (v << 3) // (v << 8) | (v << 3)
// v * 256 + v * 8 // v * 256 + v * 8
// v * (256 + 8) // v * (256 + 8)
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3
// 20 instructions // 20 instructions.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb,
int pix) { int pix) {
...@@ -358,7 +358,7 @@ __asm { ...@@ -358,7 +358,7 @@ __asm {
} }
} }
// 18 instructions // 18 instructions.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
int pix) { int pix) {
...@@ -514,7 +514,7 @@ __asm { ...@@ -514,7 +514,7 @@ __asm {
} }
} }
// TODO(fbarchard): Improve sign extension/packing // TODO(fbarchard): Improve sign extension/packing.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm { __asm {
...@@ -587,7 +587,7 @@ __asm { ...@@ -587,7 +587,7 @@ __asm {
} }
} }
// Convert 16 ARGB pixels (64 bytes) to 16 Y values // Convert 16 ARGB pixels (64 bytes) to 16 Y values.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm { __asm {
...@@ -1249,8 +1249,9 @@ static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG }; ...@@ -1249,8 +1249,9 @@ static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
// TODO(fbarchard): NV12/NV21 fetch UV and use directly. // TODO(fbarchard): NV12/NV21 fetch UV and use directly.
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
// Read 8 UV from 411 // Read 8 UV from 411.
#define READYUV444 __asm { \ #define READYUV444 __asm { \
__asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \ __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \
__asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \ __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \
...@@ -1258,7 +1259,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1258,7 +1259,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpcklbw xmm0, xmm1 /* UV */ \ __asm punpcklbw xmm0, xmm1 /* UV */ \
} }
// Read 4 UV from 422, upsample to 8 UV // Read 4 UV from 422, upsample to 8 UV.
#define READYUV422 __asm { \ #define READYUV422 __asm { \
__asm movd xmm0, [esi] /* U */ \ __asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \ __asm movd xmm1, [esi + edi] /* V */ \
...@@ -1267,7 +1268,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1267,7 +1268,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
} }
// Read 2 UV from 411, upsample to 8 UV // Read 2 UV from 411, upsample to 8 UV.
#define READYUV411 __asm { \ #define READYUV411 __asm { \
__asm movd xmm0, [esi] /* U */ \ __asm movd xmm0, [esi] /* U */ \
__asm movd xmm1, [esi + edi] /* V */ \ __asm movd xmm1, [esi + edi] /* V */ \
...@@ -1277,14 +1278,14 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1277,14 +1278,14 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \ __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \
} }
// Read 4 UV from NV12, upsample to 8 UV // Read 4 UV from NV12, upsample to 8 UV.
#define READNV12 __asm { \ #define READNV12 __asm { \
__asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \ __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \
__asm lea esi, [esi + 8] \ __asm lea esi, [esi + 8] \
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
} }
// Convert 8 pixels: 8 UV and 8 Y // Convert 8 pixels: 8 UV and 8 Y.
#define YUVTORGB __asm { \ #define YUVTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \ __asm movdqa xmm1, xmm0 \
...@@ -1312,7 +1313,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1312,7 +1313,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
__asm packuswb xmm2, xmm2 /* R */ \ __asm packuswb xmm2, xmm2 /* R */ \
} }
// Convert 8 pixels: 8 VU and 8 Y // Convert 8 pixels: 8 VU and 8 Y.
#define YVUTORGB __asm { \ #define YVUTORGB __asm { \
/* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \
__asm movdqa xmm1, xmm0 \ __asm movdqa xmm1, xmm0 \
...@@ -1341,7 +1342,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; ...@@ -1341,7 +1342,7 @@ static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
} }
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes) // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void I444ToARGBRow_SSSE3(const uint8* y_buf, void I444ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -1384,7 +1385,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1384,7 +1385,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
} }
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void I422ToARGBRow_SSSE3(const uint8* y_buf, void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -1427,7 +1428,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1427,7 +1428,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
} }
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more. // Similar to I420 but duplicate UV once more.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void I411ToARGBRow_SSSE3(const uint8* y_buf, void I411ToARGBRow_SSSE3(const uint8* y_buf,
...@@ -1471,7 +1472,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1471,7 +1472,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
} }
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void NV12ToARGBRow_SSSE3(const uint8* y_buf, void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
...@@ -1509,7 +1510,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1509,7 +1510,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
} }
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void NV21ToARGBRow_SSSE3(const uint8* y_buf, void NV21ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
...@@ -1547,7 +1548,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf, ...@@ -1547,7 +1548,7 @@ void NV21ToARGBRow_SSSE3(const uint8* y_buf,
} }
// 8 pixels, unaligned. // 8 pixels, unaligned.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes) // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -1590,7 +1591,7 @@ void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -1590,7 +1591,7 @@ void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
} }
// 8 pixels, unaligned. // 8 pixels, unaligned.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -1633,7 +1634,7 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -1633,7 +1634,7 @@ void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
} }
// 8 pixels, unaligned. // 8 pixels, unaligned.
// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
// Similar to I420 but duplicate UV once more. // Similar to I420 but duplicate UV once more.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
...@@ -1678,7 +1679,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -1678,7 +1679,7 @@ void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
...@@ -1716,7 +1717,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, ...@@ -1716,7 +1717,7 @@ void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
} }
// 8 pixels, dest aligned 16. // 8 pixels, dest aligned 16.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes) // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* uv_buf, const uint8* uv_buf,
...@@ -2127,7 +2128,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { ...@@ -2127,7 +2128,7 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#endif // HAS_SPLITUV_SSE2 #endif // HAS_SPLITUV_SSE2
#ifdef HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_SSE2
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
__asm { __asm {
...@@ -2574,13 +2575,13 @@ static const uvec8 kShuffleAlpha = { ...@@ -2574,13 +2575,13 @@ static const uvec8 kShuffleAlpha = {
3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80,
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80
}; };
// Same as SSE2, but replaces // Same as SSE2, but replaces:
// psrlw xmm3, 8 // alpha // psrlw xmm3, 8 // alpha
// pshufhw xmm3, xmm3,0F5h // 8 alpha words // pshufhw xmm3, xmm3,0F5h // 8 alpha words
// pshuflw xmm3, xmm3,0F5h // pshuflw xmm3, xmm3,0F5h
// with.. // with..
// pshufb xmm3, kShuffleAlpha // alpha // pshufb xmm3, kShuffleAlpha // alpha
// Blend 8 pixels at a time // Blend 8 pixels at a time.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
...@@ -2698,7 +2699,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, ...@@ -2698,7 +2699,7 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1,
#ifdef HAS_ARGBATTENUATE_SSE2 #ifdef HAS_ARGBATTENUATE_SSE2
// Attenuate 4 pixels at a time. // Attenuate 4 pixels at a time.
// aligned to 16 bytes // Aligned to 16 bytes.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
__asm { __asm {
...@@ -2741,7 +2742,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -2741,7 +2742,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
#endif // HAS_ARGBATTENUATE_SSE2 #endif // HAS_ARGBATTENUATE_SSE2
#ifdef HAS_ARGBATTENUATE_SSSE3 #ifdef HAS_ARGBATTENUATE_SSSE3
// Shuffle table duplicating alpha // Shuffle table duplicating alpha.
static const uvec8 kShuffleAlpha0 = { static const uvec8 kShuffleAlpha0 = {
3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u,
}; };
...@@ -2791,7 +2792,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { ...@@ -2791,7 +2792,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
#ifdef HAS_ARGBUNATTENUATE_SSE2 #ifdef HAS_ARGBUNATTENUATE_SSE2
// Unattenuate 4 pixels at a time. // Unattenuate 4 pixels at a time.
// aligned to 16 bytes // Aligned to 16 bytes.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
int width) { int width) {
...@@ -2845,12 +2846,12 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, ...@@ -2845,12 +2846,12 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
#endif // HAS_ARGBUNATTENUATE_SSE2 #endif // HAS_ARGBUNATTENUATE_SSE2
#ifdef HAS_ARGBGRAYROW_SSSE3 #ifdef HAS_ARGBGRAYROW_SSSE3
// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R // Constant for ARGB color to gray scale: 0.11 * B + 0.59 * G + 0.30 * R
static const vec8 kARGBToGray = { static const vec8 kARGBToGray = {
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
}; };
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
__asm { __asm {
...@@ -2893,7 +2894,7 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) { ...@@ -2893,7 +2894,7 @@ void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
// b = (r * 35 + g * 68 + b * 17) >> 7 // b = (r * 35 + g * 68 + b * 17) >> 7
// g = (r * 45 + g * 88 + b * 22) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7
// r = (r * 50 + g * 98 + b * 24) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7
// Constant for ARGB color to sepia tone // Constant for ARGB color to sepia tone.
static const vec8 kARGBToSepiaB = { static const vec8 kARGBToSepiaB = {
17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
}; };
...@@ -3071,7 +3072,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, ...@@ -3071,7 +3072,7 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
#ifdef HAS_ARGBQUANTIZEROW_SSE2 #ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes). // Quantize 4 ARGB pixels (16 bytes).
// aligned to 16 bytes // Aligned to 16 bytes.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) { int interval_offset, int width) {
...@@ -3306,6 +3307,42 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, ...@@ -3306,6 +3307,42 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
} }
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
#ifdef HAS_ARGBSHADE_SSE2
// Shade 4 pixels at a time by specified value.
// Aligned to 16 bytes.
__declspec(naked) __declspec(align(16))
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
uint32 value) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_argb
mov ecx, [esp + 12] // width
movd xmm2, [esp + 16] // value
sub edx, eax
punpcklbw xmm2, xmm2
punpcklqdq xmm2, xmm2
align 16
convertloop:
movdqa xmm0, [eax] // read 4 pixels
movdqa xmm1, xmm0
punpcklbw xmm0, xmm0 // first 2
punpckhbw xmm1, xmm1 // next 2
pmulhuw xmm0, xmm2 // argb * value
pmulhuw xmm1, xmm2 // argb * value
psrlw xmm0, 8
psrlw xmm1, 8
packuswb xmm0, xmm1
sub ecx, 4
movdqa [eax + edx], xmm0
lea eax, [eax + 16]
jg convertloop
ret
}
}
#endif // HAS_ARGBSHADE_SSE2
#endif // _M_IX86 #endif // _M_IX86
......
...@@ -331,7 +331,6 @@ TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4) ...@@ -331,7 +331,6 @@ TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4)
TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4) TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4)
TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4) TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4)
TEST_F(libyuvTest, TestAttenuate) { TEST_F(libyuvTest, TestAttenuate) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]); SIMD_ALIGNED(uint8 orig_pixels[256][4]);
SIMD_ALIGNED(uint8 atten_pixels[256][4]); SIMD_ALIGNED(uint8 atten_pixels[256][4]);
...@@ -649,4 +648,56 @@ TEST_F(libyuvTest, TestARGBMirror) { ...@@ -649,4 +648,56 @@ TEST_F(libyuvTest, TestARGBMirror) {
} }
} }
TEST_F(libyuvTest, TestShade) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
SIMD_ALIGNED(uint8 shade_pixels[256][4]);
// Test unattenuation clamps
orig_pixels[0][0] = 10u;
orig_pixels[0][1] = 20u;
orig_pixels[0][2] = 40u;
orig_pixels[0][3] = 80u;
// Test unattenuation transparent and opaque are unaffected
orig_pixels[1][0] = 0u;
orig_pixels[1][1] = 0u;
orig_pixels[1][2] = 0u;
orig_pixels[1][3] = 255u;
orig_pixels[2][0] = 0u;
orig_pixels[2][1] = 0u;
orig_pixels[2][2] = 0u;
orig_pixels[2][3] = 0u;
orig_pixels[3][0] = 0u;
orig_pixels[3][1] = 0u;
orig_pixels[3][2] = 0u;
orig_pixels[3][3] = 0u;
ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80ffffff);
EXPECT_EQ(10u, shade_pixels[0][0]);
EXPECT_EQ(20u, shade_pixels[0][1]);
EXPECT_EQ(40u, shade_pixels[0][2]);
EXPECT_EQ(40u, shade_pixels[0][3]);
EXPECT_EQ(0u, shade_pixels[1][0]);
EXPECT_EQ(0u, shade_pixels[1][1]);
EXPECT_EQ(0u, shade_pixels[1][2]);
EXPECT_EQ(128u, shade_pixels[1][3]);
EXPECT_EQ(0u, shade_pixels[2][0]);
EXPECT_EQ(0u, shade_pixels[2][1]);
EXPECT_EQ(0u, shade_pixels[2][2]);
EXPECT_EQ(0u, shade_pixels[2][3]);
EXPECT_EQ(0u, shade_pixels[3][0]);
EXPECT_EQ(0u, shade_pixels[3][1]);
EXPECT_EQ(0u, shade_pixels[3][2]);
EXPECT_EQ(0u, shade_pixels[3][3]);
ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 4, 1, 0x80808080);
EXPECT_EQ(5u, shade_pixels[0][0]);
EXPECT_EQ(10u, shade_pixels[0][1]);
EXPECT_EQ(20u, shade_pixels[0][2]);
EXPECT_EQ(40u, shade_pixels[0][3]);
for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 256, 1,
0x80808080);
}
}
} // namespace libyuv } // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment