Commit 09209950 authored by fbarchard@google.com's avatar fbarchard@google.com

Sobel using max to get abs for SSE2

BUG=none
TEST=none
R=ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/2769004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@824 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 2f9868f1
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 823
Version: 824
License: BSD
License File: LICENSE
......
......@@ -60,9 +60,9 @@ extern "C" {
#define HAS_INTERPOLATEROW_SSSE3
#define HAS_SOBELROW_SSE2
#define HAS_SOBELTOPLANEROW_SSE2
#define HAS_SOBELXROW_SSSE3
#define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSSE3
#define HAS_SOBELYROW_SSE2
// Conversions:
#define HAS_ABGRTOUVROW_SSSE3
......@@ -1564,14 +1564,14 @@ void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
// Sobel images.
void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
uint8* dst_sobelx, int width);
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width);
void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width);
void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width);
void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width);
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width);
void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width);
void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width);
void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 823
#define LIBYUV_VERSION 824
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -1867,9 +1867,9 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
#endif
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C;
#if defined(HAS_SOBELYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SobelYRow = SobelYRow_SSSE3;
#if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelYRow = SobelYRow_SSE2;
}
#endif
#if defined(HAS_SOBELYROW_NEON)
......@@ -1880,9 +1880,9 @@ static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobely, int width) =
SobelXRow_C;
#if defined(HAS_SOBELXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SobelXRow = SobelXRow_SSSE3;
#if defined(HAS_SOBELXROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelXRow = SobelXRow_SSE2;
}
#endif
#if defined(HAS_SOBELXROW_NEON)
......
......@@ -4590,13 +4590,13 @@ void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
}
#endif // HAS_ARGBSUBTRACTROW_SSE2
#ifdef HAS_SOBELXROW_SSSE3
#ifdef HAS_SOBELXROW_SSE2
// SobelX as a matrix is
// -1 0 1
// -2 0 2
// -1 0 1
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
asm volatile (
"sub %0,%1 \n"
"sub %0,%2 \n"
......@@ -4627,7 +4627,9 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
"paddw %%xmm2,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n"
"pabsw %%xmm0,%%xmm0 \n"
"pxor %%xmm1,%%xmm1 \n"
"psubw %%xmm0,%%xmm1 \n"
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%4 \n"
BUNDLEALIGN
......@@ -4649,15 +4651,15 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
#endif
);
}
#endif // HAS_SOBELXROW_SSSE3
#endif // HAS_SOBELXROW_SSE2
#ifdef HAS_SOBELYROW_SSSE3
#ifdef HAS_SOBELYROW_SSE2
// SobelY as a matrix is
// -1 -2 -1
// 0 0 0
// 1 2 1
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
asm volatile (
"sub %0,%1 \n"
"sub %0,%2 \n"
......@@ -4687,7 +4689,9 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
"paddw %%xmm2,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n"
"paddw %%xmm1,%%xmm0 \n"
"pabsw %%xmm0,%%xmm0 \n"
"pxor %%xmm1,%%xmm1 \n"
"psubw %%xmm0,%%xmm1 \n"
"pmaxsw %%xmm1,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"sub $0x8,%3 \n"
BUNDLEALIGN
......@@ -4708,7 +4712,7 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
#endif
);
}
#endif // HAS_SOBELYROW_SSSE3
#endif // HAS_SOBELYROW_SSE2
#ifdef HAS_SOBELROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
......
......@@ -5579,14 +5579,14 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
}
#endif // HAS_ARGBSUBTRACTROW_AVX2
#ifdef HAS_SOBELXROW_SSSE3
#ifdef HAS_SOBELXROW_SSE2
// SobelX as a matrix is
// -1 0 1
// -2 0 2
// -1 0 1
__declspec(naked) __declspec(align(16))
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
__asm {
push esi
push edi
......@@ -5620,7 +5620,9 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
paddw xmm0, xmm2
paddw xmm0, xmm1
paddw xmm0, xmm1
pabsw xmm0, xmm0 // SSSE3. Could use SSE2 psubusw twice instead.
pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
psubw xmm1, xmm0
pmaxsw xmm0, xmm1
packuswb xmm0, xmm0
sub ecx, 8
movq qword ptr [eax + edx], xmm0
......@@ -5632,16 +5634,16 @@ void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
ret
}
}
#endif // HAS_SOBELXROW_SSSE3
#endif // HAS_SOBELXROW_SSE2
#ifdef HAS_SOBELYROW_SSSE3
#ifdef HAS_SOBELYROW_SSE2
// SobelY as a matrix is
// -1 -2 -1
// 0 0 0
// 1 2 1
__declspec(naked) __declspec(align(16))
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // src_y0
......@@ -5672,7 +5674,9 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
paddw xmm0, xmm2
paddw xmm0, xmm1
paddw xmm0, xmm1
pabsw xmm0, xmm0 // SSSE3. Could use SSE2 psubusw twice instead.
pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
psubw xmm1, xmm0
pmaxsw xmm0, xmm1
packuswb xmm0, xmm0
sub ecx, 8
movq qword ptr [eax + edx], xmm0
......@@ -5683,7 +5687,7 @@ void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
ret
}
}
#endif // HAS_SOBELYROW_SSSE3
#endif // HAS_SOBELYROW_SSE2
#ifdef HAS_SOBELROW_SSE2
// Adds Sobel X and Sobel Y and stores Sobel into ARGB.
......
......@@ -204,8 +204,8 @@ static void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
#define HAS_SCALEARGBCOLS_SSE2
__declspec(naked) __declspec(align(16))
static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
__asm {
push esi
push edi
......@@ -553,8 +553,8 @@ static void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
#define HAS_SCALEARGBCOLS_SSE2
// TODO(fbarchard): p2align 5 is for nacl branch targets. Reduce using
// pseudoop, bundle or macro.
static void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
intptr_t x0 = 0, x1 = 0;
asm volatile (
"movd %5,%%xmm2 \n"
......@@ -1263,8 +1263,8 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
// Scales a single row of pixels using point sampling.
// Code is adapted from libyuv bilinear yuv scaling, but with bilinear
// interpolation off, and argb pixels instead of yuv.
static void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) {
const uint32* src = reinterpret_cast<const uint32*>(src_argb);
uint32* dst = reinterpret_cast<uint32*>(dst_argb);
for (int j = 0; j < dst_width - 1; j += 2) {
......
......@@ -1120,9 +1120,9 @@ TEST_F(libyuvTest, TestSobelX) {
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobely, int width) =
SobelXRow_C;
#if defined(HAS_SOBELXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SobelXRow = SobelXRow_SSSE3;
#if defined(HAS_SOBELXROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelXRow = SobelXRow_SSE2;
}
#endif
#if defined(HAS_SOBELXROW_NEON)
......@@ -1157,9 +1157,9 @@ TEST_F(libyuvTest, TestSobelY) {
EXPECT_EQ(0u, sobel_pixels_c[255]);
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C;
#if defined(HAS_SOBELYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SobelYRow = SobelYRow_SSSE3;
#if defined(HAS_SOBELYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
SobelYRow = SobelYRow_SSE2;
}
#endif
#if defined(HAS_SOBELYROW_NEON)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment