Commit 81b804e3 authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBQuantize to do a posterizing effect. Added random resolution unittest.

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/654005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@289 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent e442dc4c
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 288
Version: 289
License: BSD
License File: LICENSE
......
......@@ -216,8 +216,11 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb,
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int x, int y, int width, int height);
// Apply a 4x3 matrix rotation to each ARGB pixel.
// Apply a matrix rotation to each ARGB pixel.
// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1.
// The first 4 coefficients apply to B, G, R, A and produce B of the output.
// The next 4 coefficients apply to B, G, R, A and produce G of the output.
// The last 4 coefficients apply to B, G, R, A and produce R of the output.
int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
const int8* matrix_argb,
int x, int y, int width, int height);
......@@ -228,6 +231,14 @@ int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
const uint8* table_argb,
int x, int y, int width, int height);
// Quantize a rectangle of ARGB. Alpha unaffected.
// scale is a 16 bit fractional fixed point scaler between 0 and 65535.
// interval_size should be a value between 1 and 255.
// interval_offset should be a value between 0 and 255.
int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
int scale, int interval_size, int interval_offset,
int x, int y, int width, int height);
// Copy ARGB to ARGB.
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 288
#define LIBYUV_VERSION 289
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -1448,8 +1448,7 @@ int ARGBGray(uint8* dst_argb, int dst_stride_argb,
// Make a rectangle of ARGB Sepia tone.
int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
int dst_x, int dst_y,
int width, int height) {
int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
return -1;
}
......@@ -1513,6 +1512,37 @@ int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
}
return 0;
}
// ARGBQuantize is used to posterize art.
// e.g. rgb / qvalue * qvalue + qvalue / 2
// But the low levels implement efficiently with 3 parameters, and could be
// used for other high level operations.
// The divide is replaces with a multiply by reciprocal fixed point multiply.
// Caveat - although SSE2 saturates, the C function does not and should be used
// with care if doing anything but quantization.
int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
int scale, int interval_size, int interval_offset,
int dst_x, int dst_y, int width, int height) {
if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
interval_size < 1 || interval_size > 255) {
return -1;
}
void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) = ARGBQuantizeRow_C;
#if defined(HAS_ARGBQUANTIZEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
}
#endif
uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
for (int y = 0; y < height; ++y) {
ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
dst += dst_stride_argb;
}
return 0;
}
#ifdef HAVE_JPEG
struct ARGBBuffers {
uint8* argb;
......
......@@ -76,6 +76,7 @@ extern "C" {
#define HAS_ARGBGRAYROW_SSSE3
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBCOLORMATRIXROW_SSSE3
#define HAS_ARGBQUANTIZEROW_SSE2
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGE_SSE2
#endif
......@@ -85,7 +86,6 @@ extern "C" {
#define HAS_ARGBCOLORTABLEROW_X86
#endif
// The following are disabled when SSSE3 is available:
#if !defined(YUV_DISABLE_ASM) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
......@@ -494,8 +494,12 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
int width);
void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
int width);
void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width);
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width);
// Used for blur.
void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
......
......@@ -363,6 +363,19 @@ void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
}
}
void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) {
for (int x = 0; x < width; ++x) {
int b = dst_argb[0];
int g = dst_argb[1];
int r = dst_argb[2];
dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
dst_argb += 4;
}
}
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
// Copy a Y to RGB.
for (int x = 0; x < width; ++x) {
......
......@@ -2589,7 +2589,7 @@ void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) {
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrld $0x8,%%xmm5 \n"
// 4 pixel loop
// 4 pixel loop.
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
......@@ -2644,7 +2644,7 @@ void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) {
"movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n"
// 4 pixel loop
// 4 pixel loop.
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
......@@ -2691,7 +2691,7 @@ void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
"pcmpeqb %%xmm4,%%xmm4 \n"
"pslld $0x18,%%xmm4 \n"
// 4 pixel loop
// 4 pixel loop.
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
......@@ -2745,7 +2745,8 @@ CONST vec8 kARGBToGray = {
void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
asm volatile (
"movdqa %2,%%xmm4 \n"
// 8 pixel loop \n"
// 8 pixel loop.
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
......@@ -2806,7 +2807,8 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
"movdqa %2,%%xmm2 \n"
"movdqa %3,%%xmm3 \n"
"movdqa %4,%%xmm4 \n"
// 8 pixel loop \n"
// 8 pixel loop.
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
......@@ -2871,7 +2873,8 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
"pshufd $0x0,%%xmm2,%%xmm2 \n"
"pshufd $0x0,%%xmm3,%%xmm3 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
// 8 pixel loop \n"
// 8 pixel loop.
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
......@@ -2922,6 +2925,59 @@ void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb,
}
#endif // HAS_ARGBCOLORMATRIXROW_SSSE3
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
// aligned to 16 bytes
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) {
asm volatile (
"movd %2,%%xmm2 \n"
"movd %3,%%xmm3 \n"
"movd %4,%%xmm4 \n"
"pshuflw $0x40,%%xmm2,%%xmm2 \n"
"pshufd $0x44,%%xmm2,%%xmm2 \n"
"pshuflw $0x40,%%xmm3,%%xmm3 \n"
"pshufd $0x44,%%xmm3,%%xmm3 \n"
"pshuflw $0x40,%%xmm4,%%xmm4 \n"
"pshufd $0x44,%%xmm4,%%xmm4 \n"
"pxor %%xmm5,%%xmm5 \n"
"pcmpeqb %%xmm6,%%xmm6 \n"
"pslld $0x18,%%xmm6 \n"
// 4 pixel loop.
".p2align 2 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"punpcklbw %%xmm5,%%xmm0 \n"
"pmulhuw %%xmm2,%%xmm0 \n"
"movdqa (%0),%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm1 \n"
"pmulhuw %%xmm2,%%xmm1 \n"
"pmullw %%xmm3,%%xmm0 \n"
"movdqa (%0),%%xmm7 \n"
"pmullw %%xmm3,%%xmm1 \n"
"pand %%xmm6,%%xmm7 \n"
"paddw %%xmm4,%%xmm0 \n"
"paddw %%xmm4,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"por %%xmm7,%%xmm0 \n"
"sub $0x4,%1 \n"
"movdqa %%xmm0,(%0) \n"
"lea 0x10(%0),%0 \n"
"jg 1b \n"
: "+r"(dst_argb), // %0
"+r"(width) // %1
: "r"(scale), // %2
"r"(interval_size), // %3
"r"(interval_offset) // %4
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
);
}
#endif // HAS_ARGBQUANTIZEROW_SSE2
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
// Creates a table of cumulative sums where each value is a sum of all values
// above and to the left of the value, inclusive of the value.
......
......@@ -2930,6 +2930,7 @@ void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
}
}
#endif // HAS_ARGBSEPIAROW_SSSE3
#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3
// Tranform 8 ARGB pixels (32 bytes) with color matrix.
// Same as Sepia except matrix is provided.
......@@ -3042,6 +3043,53 @@ void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
}
#endif // HAS_ARGBCOLORTABLEROW_X86
#ifdef HAS_ARGBQUANTIZEROW_SSE2
// Quantize 4 ARGB pixels (16 bytes).
// aligned to 16 bytes
__declspec(naked) __declspec(align(16))
void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
int interval_offset, int width) {
__asm {
mov eax, [esp + 4] /* dst_argb */
movd xmm2, [esp + 8] /* scale */
movd xmm3, [esp + 12] /* interval_size */
movd xmm4, [esp + 16] /* interval_offset */
mov ecx, [esp + 20] /* width */
pshuflw xmm2, xmm2, 040h
pshufd xmm2, xmm2, 044h
pshuflw xmm3, xmm3, 040h
pshufd xmm3, xmm3, 044h
pshuflw xmm4, xmm4, 040h
pshufd xmm4, xmm4, 044h
pxor xmm5, xmm5 // constant 0
pcmpeqb xmm6, xmm6 // generate mask 0xff000000
pslld xmm6, 24
align 16
convertloop:
movdqa xmm0, [eax] // read 4 pixels
punpcklbw xmm0, xmm5 // first 2 pixels
pmulhuw xmm0, xmm2 // pixel * scale >> 16
movdqa xmm1, [eax] // read 4 pixels
punpckhbw xmm1, xmm5 // next 2 pixels
pmulhuw xmm1, xmm2
pmullw xmm0, xmm3 // * interval_size
movdqa xmm7, [eax] // read 4 pixels
pmullw xmm1, xmm3
pand xmm7, xmm6 // mask alpha
paddw xmm0, xmm4 // + interval_size / 2
paddw xmm1, xmm4
packuswb xmm0, xmm1
por xmm0, xmm7
sub ecx, 4
movdqa [eax], xmm0
lea eax, [eax + 16]
jg convertloop
ret
}
}
#endif // HAS_ARGBQUANTIZEROW_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2
// Consider float CumulativeSum.
// Consider calling CumulativeSum one row at time as needed.
......
This diff is collapsed.
......@@ -236,14 +236,14 @@ TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_OptVsC) { \
} \
MaskCpuFlags(kCpuInitialized); \
##FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
dst_argb_c, kWidth * BPP_B, \
kWidth, kHeight); \
dst_argb_c, kWidth * BPP_B, \
kWidth, kHeight); \
MaskCpuFlags(-1); \
const int runs = 1000; \
for (int i = 0; i < runs; ++i) { \
##FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
dst_argb_opt, kWidth * BPP_B, \
kWidth, kHeight); \
dst_argb_opt, kWidth * BPP_B, \
kWidth, kHeight); \
} \
int err = 0; \
for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \
......@@ -279,6 +279,58 @@ TESTATOB(YUY2, 2, 2, ARGB, 4)
TESTATOB(UYVY, 2, 2, ARGB, 4)
TESTATOB(M420, 3 / 2, 1, ARGB, 4)
#define TESTATOBRANDOM(FMT_A, BPP_A, STRIDE_A, FMT_B, BPP_B) \
TEST_F(libyuvTest, ##FMT_A##To##FMT_B##_Random) { \
srandom(time(NULL)); \
for (int times = 0; times < 1000; ++times) { \
const int kWidth = (random() & 63) + 1; \
const int kHeight = (random() & 31) + 1; \
align_buffer_page_end(src_argb, (kWidth * BPP_A) * kHeight); \
align_buffer_page_end(dst_argb_c, (kWidth * BPP_B) * kHeight); \
align_buffer_page_end(dst_argb_opt, (kWidth * BPP_B) * kHeight); \
for (int i = 0; i < kHeight * kWidth * BPP_A; ++i) { \
src_argb[i] = (random() & 0xff); \
} \
MaskCpuFlags(kCpuInitialized); \
##FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
dst_argb_c, kWidth * BPP_B, \
kWidth, kHeight); \
MaskCpuFlags(-1); \
##FMT_A##To##FMT_B(src_argb, kWidth * STRIDE_A, \
dst_argb_opt, kWidth * BPP_B, \
kWidth, kHeight); \
int err = 0; \
for (int i = 0; i < kHeight * kWidth * BPP_B; ++i) { \
int diff = static_cast<int>(dst_argb_c[i]) - \
static_cast<int>(dst_argb_opt[i]); \
if (abs(diff) > 2) \
err++; \
} \
EXPECT_EQ(err, 0); \
free_aligned_buffer_page_end(src_argb) \
free_aligned_buffer_page_end(dst_argb_c) \
free_aligned_buffer_page_end(dst_argb_opt) \
} \
}
TESTATOBRANDOM(ARGB, 4, 4, ARGB, 4)
TESTATOBRANDOM(ARGB, 4, 4, BGRA, 4)
TESTATOBRANDOM(ARGB, 4, 4, ABGR, 4)
TESTATOBRANDOM(ARGB, 4, 4, RAW, 3)
TESTATOBRANDOM(ARGB, 4, 4, RGB24, 3)
TESTATOBRANDOM(ARGB, 4, 4, RGB565, 2)
TESTATOBRANDOM(ARGB, 4, 4, ARGB1555, 2)
TESTATOBRANDOM(ARGB, 4, 4, ARGB4444, 2)
TESTATOBRANDOM(BGRA, 4, 4, ARGB, 4)
TESTATOBRANDOM(ABGR, 4, 4, ARGB, 4)
TESTATOBRANDOM(RAW, 3, 3, ARGB, 4)
TESTATOBRANDOM(RGB24, 3, 3, ARGB, 4)
TESTATOBRANDOM(RGB565, 2, 2, ARGB, 4)
TESTATOBRANDOM(ARGB1555, 2, 2, ARGB, 4)
TESTATOBRANDOM(ARGB4444, 2, 2, ARGB, 4)
TEST_F(libyuvTest, TestAttenuate) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
SIMD_ALIGNED(uint8 atten_pixels[256][4]);
......@@ -549,4 +601,28 @@ TEST_F(libyuvTest, TestARGBColorMatrix) {
}
}
TEST_F(libyuvTest, TestARGBQuantize) {
SIMD_ALIGNED(uint8 orig_pixels[256][4]);
for (int i = 0; i < 256; ++i) {
orig_pixels[i][0] = i;
orig_pixels[i][1] = i / 2;
orig_pixels[i][2] = i / 3;
orig_pixels[i][3] = i;
}
ARGBQuantize(&orig_pixels[0][0], 0,
(65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, 256, 1);
for (int i = 0; i < 256; ++i) {
EXPECT_EQ(i / 8 * 8 + 8 / 2, orig_pixels[i][0]);
EXPECT_EQ(i / 2 / 8 * 8 + 8 / 2, orig_pixels[i][1]);
EXPECT_EQ(i / 3 / 8 * 8 + 8 / 2, orig_pixels[i][2]);
EXPECT_EQ(i, orig_pixels[i][3]);
}
for (int i = 0; i < 1000 * 1280 * 720 / 256; ++i) {
ARGBQuantize(&orig_pixels[0][0], 0,
(65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, 256, 1);
}
}
} // namespace libyuv
This diff is collapsed.
......@@ -16,16 +16,26 @@
#define align_buffer_16(var, size) \
uint8* var; \
uint8* var##_mem; \
var##_mem = reinterpret_cast<uint8*>(calloc((size) + 15, sizeof(uint8))); \
var##_mem = reinterpret_cast<uint8*>(malloc((size) + 15)); \
var = reinterpret_cast<uint8*> \
((reinterpret_cast<intptr_t>(var##_mem) + 15) & (~0x0f)); \
((reinterpret_cast<intptr_t>(var##_mem) + 15) & ~15);
#define free_aligned_buffer_16(var) \
free(var##_mem); \
var = 0;
#ifdef WIN32
#define align_buffer_page_end(var, size) \
uint8* var; \
uint8* var##_mem; \
var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095) & ~4095)); \
var = var##_mem + (-(size) & 4095)
#define free_aligned_buffer_page_end(var) \
free(var##_mem); \
var = 0;
#ifdef WIN32
#include <windows.h>
static double get_time() {
LARGE_INTEGER t, f;
......@@ -47,7 +57,6 @@ static double get_time() {
gettimeofday(&t, &tzp);
return t.tv_sec + t.tv_usec * 1e-6;
}
#endif
class libyuvTest : public ::testing::Test {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment