Commit e8df16bd authored by fbarchard@google.com's avatar fbarchard@google.com

Sobel use G channel for consistency on all CPUs, better performance and full range of 0 to 255.

BUG=201
TESTED=out\release\libyuv_unittest --gtest_filter=*Sobel*
Review URL: https://webrtc-codereview.appspot.com/1225004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@614 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 30a96ede
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 613
Version: 614
License: BSD
License File: LICENSE
......
......@@ -19,6 +19,11 @@ extern "C" {
#endif
// TODO(fbarchard): Remove kMaxStride.
// Functions should allocate a single row buffer of this size on the stack.
// Functions that allocate more than one row buffer may fail or cause stack
// probe.
// This size is a retina Mac pixels of 32 bit ARGB.
// Functions may want less for 8 or 16 bit row buffers.
#define kMaxStride (2880 * 4)
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 613
#define LIBYUV_VERSION 614
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -72,10 +72,10 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4 &&
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
......@@ -430,9 +430,9 @@ int I420ToBayer(const uint8* src_y, int src_stride_y,
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
......
......@@ -1823,13 +1823,12 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
}
// Sobel ARGB effect.
// TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper.
LIBYUV_API
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb || !dst_argb ||
width <= 0 || height == 0 || width > kMaxStride) {
width <= 0 || height == 0 || width > (kMaxStride / 4)) {
return -1;
}
// Negative height means invert the image.
......@@ -1838,39 +1837,25 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
// Assumed row buffer aligned.
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
clear = true;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
// ARGBToBayer used to select G channel from ARGB.
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C;
#if defined(HAS_SOBELYROW_SSSE3)
......@@ -1896,18 +1881,18 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
#endif
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]);
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]);
SIMD_ALIGNED(uint8 row_sobely[kMaxStride]);
SIMD_ALIGNED(uint8 row_y[(kMaxStride / 4 + kEdge) * 3 + kEdge]);
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride / 4]);
SIMD_ALIGNED(uint8 row_sobely[kMaxStride / 4]);
// Convert first row.
uint8* row_y0 = row_y + kEdge;
uint8* row_y1 = row_y0 + kMaxStride;
uint8* row_y2 = row_y1 + kMaxStride;
ARGBToYRow(src_argb, row_y0, width);
uint8* row_y1 = row_y0 + kMaxStride / 4;
uint8* row_y2 = row_y1 + kMaxStride / 4;
ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
row_y0[-1] = row_y0[0];
row_y0[width] = row_y0[width - 1];
ARGBToYRow(src_argb, row_y1, width);
ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
row_y1[-1] = row_y1[0];
row_y1[width] = row_y1[width - 1];
......@@ -1916,7 +1901,7 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
if (y < (height - 1)) {
src_argb += src_stride_argb;
}
ARGBToYRow(src_argb, row_y2, width);
ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
row_y2[-1] = row_y2[0];
row_y2[width] = row_y2[width - 1];
......@@ -1932,23 +1917,17 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
dst_argb += dst_stride_argb;
}
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
if (clear) {
__asm vzeroupper;
}
#endif
return 0;
}
// SobelXY ARGB effect.
// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
// TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper.
LIBYUV_API
int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb || !dst_argb ||
width <= 0 || height == 0 || width > kMaxStride) {
width <= 0 || height == 0 || width > kMaxStride / 4) {
return -1;
}
// Negative height means invert the image.
......@@ -1957,35 +1936,22 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
// Assumed row buffer aligned.
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
clear = true;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
// ARGBToBayer used to select G channel from ARGB.
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 4)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
......@@ -2015,18 +1981,18 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
#endif
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]);
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]);
SIMD_ALIGNED(uint8 row_sobely[kMaxStride]);
SIMD_ALIGNED(uint8 row_y[(kMaxStride / 4 + kEdge) * 3 + kEdge]);
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride / 4]);
SIMD_ALIGNED(uint8 row_sobely[kMaxStride / 4]);
// Convert first row.
uint8* row_y0 = row_y + kEdge;
uint8* row_y1 = row_y0 + kMaxStride;
uint8* row_y2 = row_y1 + kMaxStride;
ARGBToYRow(src_argb, row_y0, width);
uint8* row_y1 = row_y0 + kMaxStride / 4;
uint8* row_y2 = row_y1 + kMaxStride / 4;
ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
row_y0[-1] = row_y0[0];
row_y0[width] = row_y0[width - 1];
ARGBToYRow(src_argb, row_y1, width);
ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
row_y1[-1] = row_y1[0];
row_y1[width] = row_y1[width - 1];
......@@ -2035,7 +2001,7 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
if (y < (height - 1)) {
src_argb += src_stride_argb;
}
ARGBToYRow(src_argb, row_y2, width);
ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
row_y2[-1] = row_y2[0];
row_y2[width] = row_y2[width - 1];
......@@ -2051,11 +2017,6 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
dst_argb += dst_stride_argb;
}
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
if (clear) {
__asm vzeroupper;
}
#endif
return 0;
}
......
......@@ -186,7 +186,7 @@ RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C,
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C,
3, 4, 1)
7, 4, 1)
#endif
#if defined(HAS_ARGBTOBAYERROW_NEON)
BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C,
......
......@@ -4595,11 +4595,14 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
".p2align 4 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"pshufb %%xmm5,%%xmm0 \n"
"sub $0x4,%2 \n"
"movd %%xmm0,(%1) \n"
"lea 0x4(%1),%1 \n"
"pshufb %%xmm5,%%xmm1 \n"
"punpckldq xmm1, xmm0 \n"
"sub $0x8,%2 \n"
"movq %%xmm0,(%1) \n"
"lea 0x8(%1),%1 \n"
"jg 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_bayer), // %1
......@@ -4607,7 +4610,7 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
: "g"(selector) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm5"
, "xmm0", "xmm1", "xmm5"
#endif
);
}
......
......@@ -5795,11 +5795,14 @@ void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
align 16
wloop:
movdqa xmm0, [eax]
lea eax, [eax + 16]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
pshufb xmm0, xmm5
sub ecx, 4
movd [edx], xmm0
lea edx, [edx + 4]
pshufb xmm1, xmm5
punpckldq xmm0, xmm1
sub ecx, 8
movq qword ptr [edx], xmm0
lea edx, [edx + 8]
jg wloop
ret
}
......
......@@ -1352,25 +1352,25 @@ static int TestSobel(int width, int height, int benchmark_iterations,
TEST_F(libyuvTest, ARGBSobel_Any) {
int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBSobel_Unaligned) {
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 1);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBSobel_Invert) {
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
benchmark_iterations_, -1, 0);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBSobel_Opt) {
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
static int TestSobelXY(int width, int height, int benchmark_iterations,
......@@ -1415,25 +1415,25 @@ static int TestSobelXY(int width, int height, int benchmark_iterations,
TEST_F(libyuvTest, ARGBSobelXY_Any) {
int max_diff = TestSobelXY(benchmark_width_ - 1, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBSobelXY_Unaligned) {
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 1);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBSobelXY_Invert) {
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
benchmark_iterations_, -1, 0);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
TEST_F(libyuvTest, ARGBSobelXY_Opt) {
int max_diff = TestSobelXY(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 14);
EXPECT_EQ(0, max_diff);
}
} // namespace libyuv
......@@ -61,4 +61,3 @@ int main(int argc, char** argv) {
}
fclose(fin1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment