Commit bdeb9ac5 authored by fbarchard@google.com's avatar fbarchard@google.com

switch from 8x8 to 4x4 matrix for dithering

BUG=407
TESTED=Dither unittests
R=brucedawson@google.com

Review URL: https://webrtc-codereview.appspot.com/46459004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1310 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0fe4abbc
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1309 Version: 1310
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -61,12 +61,15 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, ...@@ -61,12 +61,15 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height); int width, int height);
// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes). // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 255. 128 is best for no dither. // Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
// const uint8(*dither)[4][4];
LIBYUV_API LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither8x8, int width, int height); const uint8* dither4x4, int width, int height);
// Convert ARGB To ARGB1555. // Convert ARGB To ARGB1555.
LIBYUV_API LIBYUV_API
......
...@@ -910,11 +910,11 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); ...@@ -910,11 +910,11 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix); const uint32 dither4, int pix);
void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix); const uint32 dither4, int pix);
void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix); const uint32 dither4, int pix);
void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
...@@ -1384,9 +1384,9 @@ void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); ...@@ -1384,9 +1384,9 @@ void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix); const uint32 dither4, int pix);
void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix); const uint32 dither4, int pix);
void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1309 #define LIBYUV_VERSION 1310
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -805,25 +805,21 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb, ...@@ -805,25 +805,21 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
} }
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7. // Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_8x8[64] = { static const uint8 kDither565_4x4[16] = {
0 >> 5, 128 >> 5, 32 >> 5, 160 >> 5, 8 >> 5, 136 >> 5, 40 >> 5, 168 >> 5, 0, 4, 1, 5,
192 >> 5, 64 >> 5, 224 >> 5, 96 >> 5, 200 >> 5, 72 >> 5, 232 >> 5, 104 >> 5, 6, 2, 7, 3,
48 >> 5, 176 >> 5, 16 >> 5, 144 >> 5, 56 >> 5, 184 >> 5, 24 >> 5, 152 >> 5, 1, 5, 0, 4,
240 >> 5, 112 >> 5, 208 >> 5, 80 >> 5, 248 >> 5, 120 >> 5, 216 >> 5, 88 >> 5, 7, 3, 6, 2,
12 >> 5, 140 >> 5, 44 >> 5, 172 >> 5, 4 >> 5, 132 >> 5, 36 >> 5, 164 >> 5,
204 >> 5, 76 >> 5, 236 >> 5, 108 >> 5, 196 >> 5, 68 >> 5, 228 >> 5, 100 >> 5,
60 >> 5, 188 >> 5, 28 >> 5, 156 >> 5, 52 >> 5, 180 >> 5, 20 >> 5, 148 >> 5,
252 >> 5, 124 >> 5, 220 >> 5, 92 >> 5, 244 >> 5, 116 >> 5, 212 >> 5, 84 >> 5,
}; };
// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes). // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
LIBYUV_API LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565, uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither8x8, int width, int height) { const uint8* dither4x4, int width, int height) {
int y; int y;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb, void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8x8, int pix) = ARGBToRGB565DitherRow_C; const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1; return -1;
} }
...@@ -832,13 +828,13 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, ...@@ -832,13 +828,13 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
if (!dither8x8) { if (!dither4x4) {
dither8x8 = kDither565_8x8; dither4x4 = kDither565_4x4;
} }
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2) #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2; ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2; ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
} }
} }
...@@ -853,7 +849,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb, ...@@ -853,7 +849,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
#endif #endif
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565, ARGBToRGB565DitherRow(src_argb, dst_rgb565,
dither8x8 + ((y & 7) << 3), width); *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
src_argb += src_stride_argb; src_argb += src_stride_argb;
dst_rgb565 += dst_stride_rgb565; dst_rgb565 += dst_stride_rgb565;
} }
......
...@@ -227,17 +227,17 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, 2, 4, 7) ...@@ -227,17 +227,17 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, 2, 4, 7)
#define RGBDANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \ #define RGBDANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src, uint8* dst, \ void NAMEANY(const uint8* src, uint8* dst, \
const uint8* dither8x8, int width) { \ const uint32 dither4, int width) { \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ARGBTORGB_SIMD(src, dst, dither8x8, n); \ ARGBTORGB_SIMD(src, dst, dither4, n); \
} \ } \
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, dither8x8, width & MASK); \ ARGBTORGB_C(src + n * SBPP, dst + n * BPP, dither4, width & MASK); \
} }
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2) #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
RGBDANY(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2, RGBDANY(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
ARGBToRGB565DitherRow_C, 4, 2, 7) ARGBToRGB565DitherRow_C, 4, 2, 3)
#endif #endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2) #if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
RGBDANY(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, RGBDANY(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
......
...@@ -199,12 +199,20 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { ...@@ -199,12 +199,20 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
} }
} }
// dither4 is a row of 4 values from 4x4 dither matrix.
// The 4x4 matrix contains values to increase RGB. When converting to
// fewer bits (565) this provides an ordered dither.
// The order in the 4x4 matrix in first byte is upper left.
// The 4 values are passed as an int, then referenced as an array, so
// endian will not affect order of the original matrix. But the dither4
// will containing the first pixel in the lower byte for little endian
// or the upper byte for big endian.
void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8x8, int width) { const uint32 dither4, int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
int dither0 = dither8x8[x & 7]; int dither0 = ((unsigned char*)(&dither4))[x & 3];
int dither1 = dither8x8[(x + 1) & 7]; int dither1 = ((unsigned char*)(&dither4))[(x + 1) & 3];
uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
...@@ -217,7 +225,7 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb, ...@@ -217,7 +225,7 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
src_argb += 8; src_argb += 8;
} }
if (width & 1) { if (width & 1) {
int dither0 = dither8x8[(width - 1) & 7]; int dither0 = ((unsigned char*)(&dither4))[(width - 1) & 3];
uint8 b0 = clamp255(src_argb[0] + dither0) >> 3; uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
uint8 g0 = clamp255(src_argb[1] + dither0) >> 2; uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
uint8 r0 = clamp255(src_argb[2] + dither0) >> 3; uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
......
...@@ -626,18 +626,17 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { ...@@ -626,18 +626,17 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
// 8 pixels // 8 pixels
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix) { const uint32 dither4, int pix) {
__asm { __asm {
mov eax, [esp + 12] // dither8
movq xmm6, qword ptr [eax] // fetch 8 dither values
punpcklbw xmm6, xmm6
movdqa xmm7, xmm6
punpcklwd xmm6, xmm6
punpckhwd xmm7, xmm7
mov eax, [esp + 4] // src_argb mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb mov edx, [esp + 8] // dst_rgb
movd xmm6, [esp + 12] // dither4
mov ecx, [esp + 16] // pix mov ecx, [esp + 16] // pix
punpcklbw xmm6, xmm6 // make dither 16 bytes
movdqa xmm7, xmm6
punpcklwd xmm6, xmm6
punpckhwd xmm7, xmm7
pcmpeqb xmm3, xmm3 // generate mask 0x0000001f pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
psrld xmm3, 27 psrld xmm3, 27
pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
...@@ -648,7 +647,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, ...@@ -648,7 +647,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
convertloop: convertloop:
movdqu xmm0, [eax] // fetch 4 pixels of argb movdqu xmm0, [eax] // fetch 4 pixels of argb
paddusb xmm0, xmm6 paddusb xmm0, xmm6 // add dither
movdqa xmm1, xmm0 // B movdqa xmm1, xmm0 // B
movdqa xmm2, xmm0 // G movdqa xmm2, xmm0 // G
pslld xmm0, 8 // R pslld xmm0, 8 // R
...@@ -661,68 +660,46 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, ...@@ -661,68 +660,46 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
por xmm1, xmm2 // BG por xmm1, xmm2 // BG
por xmm0, xmm1 // BGR por xmm0, xmm1 // BGR
packssdw xmm0, xmm0 packssdw xmm0, xmm0
lea eax, [eax + 16]
movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
lea edx, [edx + 8]
movdqu xmm0, [eax + 16] // fetch 4 pixels of argb sub ecx, 4
paddusb xmm0, xmm7
movdqa xmm1, xmm0 // B
movdqa xmm2, xmm0 // G
pslld xmm0, 8 // R
psrld xmm1, 3 // B
psrld xmm2, 5 // G
psrad xmm0, 16 // R
pand xmm1, xmm3 // B
pand xmm2, xmm4 // G
pand xmm0, xmm5 // R
por xmm1, xmm2 // BG
por xmm0, xmm1 // BGR
packssdw xmm0, xmm0
movq qword ptr [edx + 8], xmm0 // store 4 pixels of RGB565
lea eax, [eax + 32]
lea edx, [edx + 16]
sub ecx, 8
jg convertloop jg convertloop
ret ret
} }
} }
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2 #ifdef HAS_ARGBTORGB565DITHERROW_AVX2
// TODO(fbarchard): Consider vpackusdw and remove vpsrad 16
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix) { const uint32 dither4, int pix) {
__asm { __asm {
mov eax, [esp + 12] // dither8
vmovq xmm6, qword ptr [eax] // fetch 8 dither values
vpunpcklbw xmm6, xmm6, xmm6
vpermq ymm6, ymm6, 0xd8
vpunpcklwd ymm6, ymm6, ymm6
mov eax, [esp + 4] // src_argb mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb mov edx, [esp + 8] // dst_rgb
vbroadcastss xmm6, [esp + 12] // dither4
mov ecx, [esp + 16] // pix mov ecx, [esp + 16] // pix
vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes
vpermq ymm6, ymm6, 0xd8
vpunpcklwd ymm6, ymm6, ymm6
vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
vpsrld ymm3, ymm3, 27 vpsrld ymm3, ymm3, 27
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
vpsrld ymm4, ymm4, 26 vpsrld ymm4, ymm4, 26
vpslld ymm4, ymm4, 5 vpslld ymm4, ymm4, 5
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xfffff800 vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
vpslld ymm5, ymm5, 11
convertloop: convertloop:
vmovdqu ymm0, [eax] // fetch 8 pixels of argb vmovdqu ymm0, [eax] // fetch 8 pixels of argb
vpaddusb ymm0, ymm0, ymm6 vpaddusb ymm0, ymm0, ymm6 // add dither
vpsrld ymm2, ymm0, 5 // G vpsrld ymm2, ymm0, 5 // G
vpsrld ymm1, ymm0, 3 // B vpsrld ymm1, ymm0, 3 // B
vpslld ymm0, ymm0, 8 // R vpsrld ymm0, ymm0, 8 // R
vpand ymm2, ymm2, ymm4 // G vpand ymm2, ymm2, ymm4 // G
vpand ymm1, ymm1, ymm3 // B vpand ymm1, ymm1, ymm3 // B
vpsrad ymm0, ymm0, 16 // R
vpand ymm0, ymm0, ymm5 // R vpand ymm0, ymm0, ymm5 // R
vpor ymm1, ymm1, ymm2 // BG vpor ymm1, ymm1, ymm2 // BG
vpor ymm0, ymm0, ymm1 // BGR vpor ymm0, ymm0, ymm1 // BGR
vpackssdw ymm0, ymm0, ymm0 vpackusdw ymm0, ymm0, ymm0
vpermq ymm0, ymm0, 0xd8 vpermq ymm0, ymm0, 0xd8
lea eax, [eax + 32] lea eax, [eax + 32]
vmovdqu [edx], xmm0 // store 8 pixels of RGB565 vmovdqu [edx], xmm0 // store 8 pixels of RGB565
...@@ -807,7 +784,6 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { ...@@ -807,7 +784,6 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
} }
#ifdef HAS_ARGBTORGB565ROW_AVX2 #ifdef HAS_ARGBTORGB565ROW_AVX2
// TODO(fbarchard): Consider vpackusdw and remove vpsrad 16
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm { __asm {
...@@ -819,21 +795,19 @@ void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { ...@@ -819,21 +795,19 @@ void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
vpsrld ymm4, ymm4, 26 vpsrld ymm4, ymm4, 26
vpslld ymm4, ymm4, 5 vpslld ymm4, ymm4, 5
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xfffff800 vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
vpslld ymm5, ymm5, 11
convertloop: convertloop:
vmovdqu ymm0, [eax] // fetch 8 pixels of argb vmovdqu ymm0, [eax] // fetch 8 pixels of argb
vpsrld ymm2, ymm0, 5 // G vpsrld ymm2, ymm0, 5 // G
vpsrld ymm1, ymm0, 3 // B vpsrld ymm1, ymm0, 3 // B
vpslld ymm0, ymm0, 8 // R vpsrld ymm0, ymm0, 8 // R
vpand ymm2, ymm2, ymm4 // G vpand ymm2, ymm2, ymm4 // G
vpand ymm1, ymm1, ymm3 // B vpand ymm1, ymm1, ymm3 // B
vpsrad ymm0, ymm0, 16 // R
vpand ymm0, ymm0, ymm5 // R vpand ymm0, ymm0, ymm5 // R
vpor ymm1, ymm1, ymm2 // BG vpor ymm1, ymm1, ymm2 // BG
vpor ymm0, ymm0, ymm1 // BGR vpor ymm0, ymm0, ymm1 // BGR
vpackssdw ymm0, ymm0, ymm0 vpackusdw ymm0, ymm0, ymm0
vpermq ymm0, ymm0, 0xd8 vpermq ymm0, ymm0, 0xd8
lea eax, [eax + 32] lea eax, [eax + 32]
vmovdqu [edx], xmm0 // store 8 pixels of RGB565 vmovdqu [edx], xmm0 // store 8 pixels of RGB565
......
...@@ -1381,18 +1381,14 @@ TEST_F(libyuvTest, TestYToARGB) { ...@@ -1381,18 +1381,14 @@ TEST_F(libyuvTest, TestYToARGB) {
} }
} }
static const uint8 kNoDither8x8[64] = { static const uint8 kNoDither4x4[16] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
}; };
TEST_F(libyuvTest, TestDither) { TEST_F(libyuvTest, TestNoDither) {
align_buffer_64(src_argb, benchmark_width_ * benchmark_height_ * 4); align_buffer_64(src_argb, benchmark_width_ * benchmark_height_ * 4);
align_buffer_64(dst_rgb565, benchmark_width_ * benchmark_height_ * 2); align_buffer_64(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
align_buffer_64(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2); align_buffer_64(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
...@@ -1404,7 +1400,7 @@ TEST_F(libyuvTest, TestDither) { ...@@ -1404,7 +1400,7 @@ TEST_F(libyuvTest, TestDither) {
benchmark_width_, benchmark_height_); benchmark_width_, benchmark_height_);
ARGBToRGB565Dither(src_argb, benchmark_width_ * 4, ARGBToRGB565Dither(src_argb, benchmark_width_ * 4,
dst_rgb565dither, benchmark_width_ * 2, dst_rgb565dither, benchmark_width_ * 2,
kNoDither8x8, benchmark_width_, benchmark_height_); kNoDither4x4, benchmark_width_, benchmark_height_);
for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]); EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]);
} }
...@@ -1414,4 +1410,46 @@ TEST_F(libyuvTest, TestDither) { ...@@ -1414,4 +1410,46 @@ TEST_F(libyuvTest, TestDither) {
free_aligned_buffer_64(dst_rgb565dither); free_aligned_buffer_64(dst_rgb565dither);
} }
// Ordered 4x4 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_4x4[16] = {
0, 4, 1, 5,
6, 2, 7, 3,
1, 5, 0, 4,
7, 3, 6, 2,
};
TEST_F(libyuvTest, TestDither) {
align_buffer_64(src_argb, benchmark_width_ * benchmark_height_ * 4);
align_buffer_64(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
align_buffer_64(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
align_buffer_64(dst_argb, benchmark_width_ * benchmark_height_ * 4);
align_buffer_64(dst_argbdither, benchmark_width_ * benchmark_height_ * 4);
MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4);
MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4);
MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4);
ARGBToRGB565(src_argb, benchmark_width_ * 4,
dst_rgb565, benchmark_width_ * 2,
benchmark_width_, benchmark_height_);
ARGBToRGB565Dither(src_argb, benchmark_width_ * 4,
dst_rgb565dither, benchmark_width_ * 2,
kDither565_4x4, benchmark_width_, benchmark_height_);
RGB565ToARGB(dst_rgb565, benchmark_width_ * 2,
dst_argb, benchmark_width_ * 4,
benchmark_width_, benchmark_height_);
RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2,
dst_argbdither, benchmark_width_ * 4,
benchmark_width_, benchmark_height_);
for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9);
}
free_aligned_buffer_64(src_argb);
free_aligned_buffer_64(dst_rgb565);
free_aligned_buffer_64(dst_rgb565dither);
free_aligned_buffer_64(dst_argb);
free_aligned_buffer_64(dst_argbdither);
}
} // namespace libyuv } // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment