Commit 09209950 authored by fbarchard@google.com's avatar fbarchard@google.com

Sobel using max to get abs for SSE2

BUG=none
TEST=none
R=ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/2769004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@824 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 2f9868f1
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 823 Version: 824
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -60,9 +60,9 @@ extern "C" { ...@@ -60,9 +60,9 @@ extern "C" {
#define HAS_INTERPOLATEROW_SSSE3 #define HAS_INTERPOLATEROW_SSSE3
#define HAS_SOBELROW_SSE2 #define HAS_SOBELROW_SSE2
#define HAS_SOBELTOPLANEROW_SSE2 #define HAS_SOBELTOPLANEROW_SSE2
#define HAS_SOBELXROW_SSSE3 #define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2 #define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSSE3 #define HAS_SOBELYROW_SSE2
// Conversions: // Conversions:
#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOUVROW_SSSE3
...@@ -1564,14 +1564,14 @@ void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, ...@@ -1564,14 +1564,14 @@ void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
// Sobel images. // Sobel images.
void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
uint8* dst_sobelx, int width); uint8* dst_sobelx, int width);
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1, void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width); const uint8* src_y2, uint8* dst_sobelx, int width);
void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width); const uint8* src_y2, uint8* dst_sobelx, int width);
void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width); uint8* dst_sobely, int width);
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width); uint8* dst_sobely, int width);
void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width); uint8* dst_sobely, int width);
void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 823 #define LIBYUV_VERSION 824
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -1867,9 +1867,9 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, ...@@ -1867,9 +1867,9 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
#endif #endif
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C; uint8* dst_sobely, int width) = SobelYRow_C;
#if defined(HAS_SOBELYROW_SSSE3) #if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelYRow = SobelYRow_SSSE3; SobelYRow = SobelYRow_SSE2;
} }
#endif #endif
#if defined(HAS_SOBELYROW_NEON) #if defined(HAS_SOBELYROW_NEON)
...@@ -1880,9 +1880,9 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, ...@@ -1880,9 +1880,9 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobely, int width) = const uint8* src_y2, uint8* dst_sobely, int width) =
SobelXRow_C; SobelXRow_C;
#if defined(HAS_SOBELXROW_SSSE3) #if defined(HAS_SOBELXROW_SSE2)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelXRow = SobelXRow_SSSE3; SobelXRow = SobelXRow_SSE2;
} }
#endif #endif
#if defined(HAS_SOBELXROW_NEON) #if defined(HAS_SOBELXROW_NEON)
......
...@@ -4590,13 +4590,13 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -4590,13 +4590,13 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
} }
#endif // HAS_ARGBSUBTRACTROW_SSE2 #endif // HAS_ARGBSUBTRACTROW_SSE2
#ifdef HAS_SOBELXROW_SSSE3 #ifdef HAS_SOBELXROW_SSE2
// SobelX as a matrix is // SobelX as a matrix is
// -1 0 1 // -1 0 1
// -2 0 2 // -2 0 2
// -1 0 1 // -1 0 1
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1, void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) { const uint8* src_y2, uint8* dst_sobelx, int width) {
asm volatile ( asm volatile (
"sub %0,%1 \n" "sub %0,%1 \n"
"sub %0,%2 \n" "sub %0,%2 \n"
...@@ -4627,7 +4627,9 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -4627,7 +4627,9 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
"paddw %%xmm2,%%xmm0 \n" "paddw %%xmm2,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n"
"pabsw %%xmm0,%%xmm0 \n" "pxor %%xmm1,%%xmm1 \n"
"psubw %%xmm0,%%xmm1 \n"
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%4 \n" "sub $0x8,%4 \n"
BUNDLEALIGN BUNDLEALIGN
...@@ -4649,15 +4651,15 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -4649,15 +4651,15 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
#endif #endif
); );
} }
#endif // HAS_SOBELXROW_SSSE3 #endif // HAS_SOBELXROW_SSE2
#ifdef HAS_SOBELYROW_SSSE3 #ifdef HAS_SOBELYROW_SSE2
// SobelY as a matrix is // SobelY as a matrix is
// -1 -2 -1 // -1 -2 -1
// 0 0 0 // 0 0 0
// 1 2 1 // 1 2 1
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) { uint8* dst_sobely, int width) {
asm volatile ( asm volatile (
"sub %0,%1 \n" "sub %0,%1 \n"
"sub %0,%2 \n" "sub %0,%2 \n"
...@@ -4687,7 +4689,9 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -4687,7 +4689,9 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
"paddw %%xmm2,%%xmm0 \n" "paddw %%xmm2,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n"
"pabsw %%xmm0,%%xmm0 \n" "pxor %%xmm1,%%xmm1 \n"
"psubw %%xmm0,%%xmm1 \n"
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%3 \n" "sub $0x8,%3 \n"
BUNDLEALIGN BUNDLEALIGN
...@@ -4708,7 +4712,7 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -4708,7 +4712,7 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
#endif #endif
); );
} }
#endif // HAS_SOBELYROW_SSSE3 #endif // HAS_SOBELYROW_SSE2
#ifdef HAS_SOBELROW_SSE2 #ifdef HAS_SOBELROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into ARGB. // Adds Sobel X and Sobel Y and stores Sobel into ARGB.
......
...@@ -5579,14 +5579,14 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, ...@@ -5579,14 +5579,14 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
} }
#endif // HAS_ARGBSUBTRACTROW_AVX2 #endif // HAS_ARGBSUBTRACTROW_AVX2
#ifdef HAS_SOBELXROW_SSSE3 #ifdef HAS_SOBELXROW_SSE2
// SobelX as a matrix is // SobelX as a matrix is
// -1 0 1 // -1 0 1
// -2 0 2 // -2 0 2
// -1 0 1 // -1 0 1
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1, void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) { const uint8* src_y2, uint8* dst_sobelx, int width) {
__asm { __asm {
push esi push esi
push edi push edi
...@@ -5620,7 +5620,9 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -5620,7 +5620,9 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
paddw xmm0, xmm2 paddw xmm0, xmm2
paddw xmm0, xmm1 paddw xmm0, xmm1
paddw xmm0, xmm1 paddw xmm0, xmm1
pabsw xmm0, xmm0 // SSSE3. Could use SSE2 psubusw twice instead. pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
psubw xmm1, xmm0
pmaxsw xmm0, xmm1
packuswb xmm0, xmm0 packuswb xmm0, xmm0
sub ecx, 8 sub ecx, 8
movq qword ptr [eax + edx], xmm0 movq qword ptr [eax + edx], xmm0
...@@ -5632,16 +5634,16 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -5632,16 +5634,16 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
ret ret
} }
} }
#endif // HAS_SOBELXROW_SSSE3 #endif // HAS_SOBELXROW_SSE2
#ifdef HAS_SOBELYROW_SSSE3 #ifdef HAS_SOBELYROW_SSE2
// SobelY as a matrix is // SobelY as a matrix is
// -1 -2 -1 // -1 -2 -1
// 0 0 0 // 0 0 0
// 1 2 1 // 1 2 1
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) { uint8* dst_sobely, int width) {
__asm { __asm {
push esi push esi
mov eax, [esp + 4 + 4] // src_y0 mov eax, [esp + 4 + 4] // src_y0
...@@ -5672,7 +5674,9 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -5672,7 +5674,9 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
paddw xmm0, xmm2 paddw xmm0, xmm2
paddw xmm0, xmm1 paddw xmm0, xmm1
paddw xmm0, xmm1 paddw xmm0, xmm1
pabsw xmm0, xmm0 // SSSE3. Could use SSE2 psubusw twice instead. pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
psubw xmm1, xmm0
pmaxsw xmm0, xmm1
packuswb xmm0, xmm0 packuswb xmm0, xmm0
sub ecx, 8 sub ecx, 8
movq qword ptr [eax + edx], xmm0 movq qword ptr [eax + edx], xmm0
...@@ -5683,7 +5687,7 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1, ...@@ -5683,7 +5687,7 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
ret ret
} }
} }
#endif // HAS_SOBELYROW_SSSE3 #endif // HAS_SOBELYROW_SSE2
#ifdef HAS_SOBELROW_SSE2 #ifdef HAS_SOBELROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into ARGB. // Adds Sobel X and Sobel Y and stores Sobel into ARGB.
......
...@@ -204,8 +204,8 @@ static void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ...@@ -204,8 +204,8 @@ static void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
#define HAS_SCALEARGBCOLS_SSE2 #define HAS_SCALEARGBCOLS_SSE2
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) { int dst_width, int x, int dx) {
__asm { __asm {
push esi push esi
push edi push edi
...@@ -553,8 +553,8 @@ static void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, ...@@ -553,8 +553,8 @@ static void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
#define HAS_SCALEARGBCOLS_SSE2 #define HAS_SCALEARGBCOLS_SSE2
// TODO(fbarchard): p2align 5 is for nacl branch targets. Reduce using // TODO(fbarchard): p2align 5 is for nacl branch targets. Reduce using
// pseudoop, bundle or macro. // pseudoop, bundle or macro.
static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) { int dst_width, int x, int dx) {
intptr_t x0 = 0, x1 = 0; intptr_t x0 = 0, x1 = 0;
asm volatile ( asm volatile (
"movd %5,%%xmm2 \n" "movd %5,%%xmm2 \n"
...@@ -1263,8 +1263,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, ...@@ -1263,8 +1263,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
// Scales a single row of pixels using point sampling. // Scales a single row of pixels using point sampling.
// Code is adapted from libyuv bilinear yuv scaling, but with bilinear // Code is adapted from libyuv bilinear yuv scaling, but with bilinear
// interpolation off, and argb pixels instead of yuv. // interpolation off, and argb pixels instead of yuv.
static void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) { int dst_width, int x, int dx) {
const uint32* src = reinterpret_cast<const uint32*>(src_argb); const uint32* src = reinterpret_cast<const uint32*>(src_argb);
uint32* dst = reinterpret_cast<uint32*>(dst_argb); uint32* dst = reinterpret_cast<uint32*>(dst_argb);
for (int j = 0; j < dst_width - 1; j += 2) { for (int j = 0; j < dst_width - 1; j += 2) {
......
...@@ -1120,9 +1120,9 @@ TEST_F(libyuvTest, TestSobelX) { ...@@ -1120,9 +1120,9 @@ TEST_F(libyuvTest, TestSobelX) {
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobely, int width) = const uint8* src_y2, uint8* dst_sobely, int width) =
SobelXRow_C; SobelXRow_C;
#if defined(HAS_SOBELXROW_SSSE3) #if defined(HAS_SOBELXROW_SSE2)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelXRow = SobelXRow_SSSE3; SobelXRow = SobelXRow_SSE2;
} }
#endif #endif
#if defined(HAS_SOBELXROW_NEON) #if defined(HAS_SOBELXROW_NEON)
...@@ -1157,9 +1157,9 @@ TEST_F(libyuvTest, TestSobelY) { ...@@ -1157,9 +1157,9 @@ TEST_F(libyuvTest, TestSobelY) {
EXPECT_EQ(0u, sobel_pixels_c[255]); EXPECT_EQ(0u, sobel_pixels_c[255]);
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C; uint8* dst_sobely, int width) = SobelYRow_C;
#if defined(HAS_SOBELYROW_SSSE3) #if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelYRow = SobelYRow_SSSE3; SobelYRow = SobelYRow_SSE2;
} }
#endif #endif
#if defined(HAS_SOBELYROW_NEON) #if defined(HAS_SOBELYROW_NEON)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment