Commit bdeb9ac5 authored by fbarchard@google.com's avatar fbarchard@google.com

switch from 8x8 to 4x4 matrix for dithering

BUG=407
TESTED=Dither unittests
R=brucedawson@google.com

Review URL: https://webrtc-codereview.appspot.com/46459004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1310 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 0fe4abbc
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1309
Version: 1310
License: BSD
License File: LICENSE
......
......@@ -61,12 +61,15 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes).
// Values in dither matrix from 0 to 255. 128 is best for no dither.
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
// Values in dither matrix from 0 to 7 recommended.
// The order of the dither matrix is first byte is upper left.
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
// const uint8(*dither)[4][4];
LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither8x8, int width, int height);
const uint8* dither4x4, int width, int height);
// Convert ARGB To ARGB1555.
LIBYUV_API
......
......@@ -910,11 +910,11 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix);
const uint32 dither4, int pix);
void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix);
const uint32 dither4, int pix);
void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix);
const uint32 dither4, int pix);
void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
......@@ -1384,9 +1384,9 @@ void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix);
const uint32 dither4, int pix);
void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix);
const uint32 dither4, int pix);
void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix);
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1309
#define LIBYUV_VERSION 1310
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -805,25 +805,21 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
}
// Ordered 8x8 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_8x8[64] = {
0 >> 5, 128 >> 5, 32 >> 5, 160 >> 5, 8 >> 5, 136 >> 5, 40 >> 5, 168 >> 5,
192 >> 5, 64 >> 5, 224 >> 5, 96 >> 5, 200 >> 5, 72 >> 5, 232 >> 5, 104 >> 5,
48 >> 5, 176 >> 5, 16 >> 5, 144 >> 5, 56 >> 5, 184 >> 5, 24 >> 5, 152 >> 5,
240 >> 5, 112 >> 5, 208 >> 5, 80 >> 5, 248 >> 5, 120 >> 5, 216 >> 5, 88 >> 5,
12 >> 5, 140 >> 5, 44 >> 5, 172 >> 5, 4 >> 5, 132 >> 5, 36 >> 5, 164 >> 5,
204 >> 5, 76 >> 5, 236 >> 5, 108 >> 5, 196 >> 5, 68 >> 5, 228 >> 5, 100 >> 5,
60 >> 5, 188 >> 5, 28 >> 5, 156 >> 5, 52 >> 5, 180 >> 5, 20 >> 5, 148 >> 5,
252 >> 5, 124 >> 5, 220 >> 5, 92 >> 5, 244 >> 5, 116 >> 5, 212 >> 5, 84 >> 5,
static const uint8 kDither565_4x4[16] = {
0, 4, 1, 5,
6, 2, 7, 3,
1, 5, 0, 4,
7, 3, 6, 2,
};
// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes).
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
LIBYUV_API
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
const uint8* dither8x8, int width, int height) {
const uint8* dither4x4, int width, int height) {
int y;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8x8, int pix) = ARGBToRGB565DitherRow_C;
const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1;
}
......@@ -832,13 +828,13 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
if (!dither8x8) {
dither8x8 = kDither565_8x8;
if (!dither4x4) {
dither4x4 = kDither565_4x4;
}
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2;
}
}
......@@ -853,7 +849,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
#endif
for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
dither8x8 + ((y & 7) << 3), width);
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
src_argb += src_stride_argb;
dst_rgb565 += dst_stride_rgb565;
}
......
......@@ -227,17 +227,17 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, 2, 4, 7)
#define RGBDANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src, uint8* dst, \
const uint8* dither8x8, int width) { \
const uint32 dither4, int width) { \
int n = width & ~MASK; \
if (n > 0) { \
ARGBTORGB_SIMD(src, dst, dither8x8, n); \
ARGBTORGB_SIMD(src, dst, dither4, n); \
} \
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, dither8x8, width & MASK); \
ARGBTORGB_C(src + n * SBPP, dst + n * BPP, dither4, width & MASK); \
}
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
RGBDANY(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2,
ARGBToRGB565DitherRow_C, 4, 2, 7)
ARGBToRGB565DitherRow_C, 4, 2, 3)
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
RGBDANY(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2,
......
......@@ -199,12 +199,20 @@ void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
// dither4 is a row of 4 values from 4x4 dither matrix.
// The 4x4 matrix contains values to increase RGB. When converting to
// fewer bits (565) this provides an ordered dither.
// The order in the 4x4 matrix in first byte is upper left.
// The 4 values are passed as an int, then referenced as an array, so
// endian will not affect order of the original matrix. But the dither4
// will containing the first pixel in the lower byte for little endian
// or the upper byte for big endian.
void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8x8, int width) {
const uint32 dither4, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
int dither0 = dither8x8[x & 7];
int dither1 = dither8x8[(x + 1) & 7];
int dither0 = ((unsigned char*)(&dither4))[x & 3];
int dither1 = ((unsigned char*)(&dither4))[(x + 1) & 3];
uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
......@@ -217,7 +225,7 @@ void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
src_argb += 8;
}
if (width & 1) {
int dither0 = dither8x8[(width - 1) & 7];
int dither0 = ((unsigned char*)(&dither4))[(width - 1) & 3];
uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
......
......@@ -626,18 +626,17 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
// 8 pixels
__declspec(naked) __declspec(align(16))
void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix) {
const uint32 dither4, int pix) {
__asm {
mov eax, [esp + 12] // dither8
movq xmm6, qword ptr [eax] // fetch 8 dither values
punpcklbw xmm6, xmm6
movdqa xmm7, xmm6
punpcklwd xmm6, xmm6
punpckhwd xmm7, xmm7
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
movd xmm6, [esp + 12] // dither4
mov ecx, [esp + 16] // pix
punpcklbw xmm6, xmm6 // make dither 16 bytes
movdqa xmm7, xmm6
punpcklwd xmm6, xmm6
punpckhwd xmm7, xmm7
pcmpeqb xmm3, xmm3 // generate mask 0x0000001f
psrld xmm3, 27
pcmpeqb xmm4, xmm4 // generate mask 0x000007e0
......@@ -648,7 +647,7 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
convertloop:
movdqu xmm0, [eax] // fetch 4 pixels of argb
paddusb xmm0, xmm6
paddusb xmm0, xmm6 // add dither
movdqa xmm1, xmm0 // B
movdqa xmm2, xmm0 // G
pslld xmm0, 8 // R
......@@ -661,68 +660,46 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
por xmm1, xmm2 // BG
por xmm0, xmm1 // BGR
packssdw xmm0, xmm0
lea eax, [eax + 16]
movq qword ptr [edx], xmm0 // store 4 pixels of RGB565
movdqu xmm0, [eax + 16] // fetch 4 pixels of argb
paddusb xmm0, xmm7
movdqa xmm1, xmm0 // B
movdqa xmm2, xmm0 // G
pslld xmm0, 8 // R
psrld xmm1, 3 // B
psrld xmm2, 5 // G
psrad xmm0, 16 // R
pand xmm1, xmm3 // B
pand xmm2, xmm4 // G
pand xmm0, xmm5 // R
por xmm1, xmm2 // BG
por xmm0, xmm1 // BGR
packssdw xmm0, xmm0
movq qword ptr [edx + 8], xmm0 // store 4 pixels of RGB565
lea eax, [eax + 32]
lea edx, [edx + 16]
sub ecx, 8
lea edx, [edx + 8]
sub ecx, 4
jg convertloop
ret
}
}
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2
// TODO(fbarchard): Consider vpackusdw and remove vpsrad 16
__declspec(naked) __declspec(align(16))
void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
const uint8* dither8, int pix) {
const uint32 dither4, int pix) {
__asm {
mov eax, [esp + 12] // dither8
vmovq xmm6, qword ptr [eax] // fetch 8 dither values
vpunpcklbw xmm6, xmm6, xmm6
vpermq ymm6, ymm6, 0xd8
vpunpcklwd ymm6, ymm6, ymm6
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_rgb
vbroadcastss xmm6, [esp + 12] // dither4
mov ecx, [esp + 16] // pix
vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes
vpermq ymm6, ymm6, 0xd8
vpunpcklwd ymm6, ymm6, ymm6
vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f
vpsrld ymm3, ymm3, 27
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
vpsrld ymm4, ymm4, 26
vpslld ymm4, ymm4, 5
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xfffff800
vpslld ymm5, ymm5, 11
vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
convertloop:
vmovdqu ymm0, [eax] // fetch 8 pixels of argb
vpaddusb ymm0, ymm0, ymm6
vpaddusb ymm0, ymm0, ymm6 // add dither
vpsrld ymm2, ymm0, 5 // G
vpsrld ymm1, ymm0, 3 // B
vpslld ymm0, ymm0, 8 // R
vpsrld ymm0, ymm0, 8 // R
vpand ymm2, ymm2, ymm4 // G
vpand ymm1, ymm1, ymm3 // B
vpsrad ymm0, ymm0, 16 // R
vpand ymm0, ymm0, ymm5 // R
vpor ymm1, ymm1, ymm2 // BG
vpor ymm0, ymm0, ymm1 // BGR
vpackssdw ymm0, ymm0, ymm0
vpackusdw ymm0, ymm0, ymm0
vpermq ymm0, ymm0, 0xd8
lea eax, [eax + 32]
vmovdqu [edx], xmm0 // store 8 pixels of RGB565
......@@ -807,7 +784,6 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) {
}
#ifdef HAS_ARGBTORGB565ROW_AVX2
// TODO(fbarchard): Consider vpackusdw and remove vpsrad 16
__declspec(naked) __declspec(align(16))
void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
__asm {
......@@ -819,21 +795,19 @@ void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) {
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0
vpsrld ymm4, ymm4, 26
vpslld ymm4, ymm4, 5
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xfffff800
vpslld ymm5, ymm5, 11
vpslld ymm5, ymm3, 11 // generate mask 0x0000f800
convertloop:
vmovdqu ymm0, [eax] // fetch 8 pixels of argb
vpsrld ymm2, ymm0, 5 // G
vpsrld ymm1, ymm0, 3 // B
vpslld ymm0, ymm0, 8 // R
vpsrld ymm0, ymm0, 8 // R
vpand ymm2, ymm2, ymm4 // G
vpand ymm1, ymm1, ymm3 // B
vpsrad ymm0, ymm0, 16 // R
vpand ymm0, ymm0, ymm5 // R
vpor ymm1, ymm1, ymm2 // BG
vpor ymm0, ymm0, ymm1 // BGR
vpackssdw ymm0, ymm0, ymm0
vpackusdw ymm0, ymm0, ymm0
vpermq ymm0, ymm0, 0xd8
lea eax, [eax + 32]
vmovdqu [edx], xmm0 // store 8 pixels of RGB565
......
......@@ -1381,18 +1381,14 @@ TEST_F(libyuvTest, TestYToARGB) {
}
}
static const uint8 kNoDither8x8[64] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
static const uint8 kNoDither4x4[16] = {
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
};
TEST_F(libyuvTest, TestDither) {
TEST_F(libyuvTest, TestNoDither) {
align_buffer_64(src_argb, benchmark_width_ * benchmark_height_ * 4);
align_buffer_64(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
align_buffer_64(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
......@@ -1404,7 +1400,7 @@ TEST_F(libyuvTest, TestDither) {
benchmark_width_, benchmark_height_);
ARGBToRGB565Dither(src_argb, benchmark_width_ * 4,
dst_rgb565dither, benchmark_width_ * 2,
kNoDither8x8, benchmark_width_, benchmark_height_);
kNoDither4x4, benchmark_width_, benchmark_height_);
for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]);
}
......@@ -1414,4 +1410,46 @@ TEST_F(libyuvTest, TestDither) {
free_aligned_buffer_64(dst_rgb565dither);
}
// Ordered 4x4 dither for 888 to 565. Values from 0 to 7.
static const uint8 kDither565_4x4[16] = {
0, 4, 1, 5,
6, 2, 7, 3,
1, 5, 0, 4,
7, 3, 6, 2,
};
TEST_F(libyuvTest, TestDither) {
align_buffer_64(src_argb, benchmark_width_ * benchmark_height_ * 4);
align_buffer_64(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
align_buffer_64(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
align_buffer_64(dst_argb, benchmark_width_ * benchmark_height_ * 4);
align_buffer_64(dst_argbdither, benchmark_width_ * benchmark_height_ * 4);
MemRandomize(src_argb, benchmark_width_ * benchmark_height_ * 4);
MemRandomize(dst_rgb565, benchmark_width_ * benchmark_height_ * 2);
MemRandomize(dst_rgb565dither, benchmark_width_ * benchmark_height_ * 2);
MemRandomize(dst_argb, benchmark_width_ * benchmark_height_ * 4);
MemRandomize(dst_argbdither, benchmark_width_ * benchmark_height_ * 4);
ARGBToRGB565(src_argb, benchmark_width_ * 4,
dst_rgb565, benchmark_width_ * 2,
benchmark_width_, benchmark_height_);
ARGBToRGB565Dither(src_argb, benchmark_width_ * 4,
dst_rgb565dither, benchmark_width_ * 2,
kDither565_4x4, benchmark_width_, benchmark_height_);
RGB565ToARGB(dst_rgb565, benchmark_width_ * 2,
dst_argb, benchmark_width_ * 4,
benchmark_width_, benchmark_height_);
RGB565ToARGB(dst_rgb565dither, benchmark_width_ * 2,
dst_argbdither, benchmark_width_ * 4,
benchmark_width_, benchmark_height_);
for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9);
}
free_aligned_buffer_64(src_argb);
free_aligned_buffer_64(dst_rgb565);
free_aligned_buffer_64(dst_rgb565dither);
free_aligned_buffer_64(dst_argb);
free_aligned_buffer_64(dst_argbdither);
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment