Commit e1247eec authored by fbarchard@google.com's avatar fbarchard@google.com

Sobel functions

BUG=201
TEST=none
Review URL: https://webrtc-codereview.appspot.com/1200004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@608 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 5ca144d2
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 607
Version: 608
License: BSD
License File: LICENSE
......
......@@ -120,6 +120,8 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
#define HAS_ARGBCOLORTABLEROW_X86
#define HAS_COPYROW_AVX2
#define HAS_SOBELXROW_SSSE3
#define HAS_SOBELYROW_SSSE3
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
// TODO(fbarchard): Hook these up to all functions. e.g. format conversion.
......@@ -1419,6 +1421,16 @@ void ARGBInterpolateRow_NEON(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride_argb, int dst_width,
int source_y_fraction);
// Sobel images.
void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
uint8* dst_sobelx, int width);
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width);
void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width);
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 607
#define LIBYUV_VERSION 608
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -712,6 +712,53 @@ void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
}
#undef SHADE
// Sobel functions which mimics SSSE3.
void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
uint8* dst_sobelx, int width) {
for (int i = 0; i < width; ++i) {
int a = src_y0[i];
int b = src_y1[i];
int c = src_y2[i];
int a_sub = src_y0[i + 2];
int b_sub = src_y1[i + 2];
int c_sub = src_y2[i + 2];
int a_diff = a - a_sub;
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = a_diff + b_diff * 2 + c_diff;
if (sobel < 0) {
sobel = -sobel;
}
if (sobel > 255) {
sobel = 255;
}
dst_sobelx[i] = static_cast<uint8>(sobel);
}
}
void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
for (int i = 0; i < width; ++i) {
int a = src_y0[i + 0];
int b = src_y0[i + 1];
int c = src_y0[i + 2];
int a_sub = src_y1[i + 0];
int b_sub = src_y1[i + 1];
int c_sub = src_y1[i + 2];
int a_diff = a - a_sub;
int b_diff = b - b_sub;
int c_diff = c - c_sub;
int sobel = a_diff + b_diff * 2 + c_diff;
if (sobel < 0) {
sobel = -sobel;
}
if (sobel > 255) {
sobel = 255;
}
dst_sobely[i] = static_cast<uint8>(sobel);
}
}
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
// Copy a Y to RGB.
for (int x = 0; x < width; ++x) {
......
......@@ -5027,6 +5027,112 @@ void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
}
#endif // HAS_ARGBSUBTRACTROW_AVX2
#ifdef HAS_SOBELXROW_SSSE3
// SobelX as a matrix is
// -1 0 1
// -2 0 2
// -1 0 1
__declspec(naked) __declspec(align(16))
void SobelXRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobelx, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y0
mov esi, [esp + 8 + 8] // src_y1
mov edi, [esp + 8 + 12] // src_y2
mov edx, [esp + 8 + 16] // dst_sobelx
mov ecx, [esp + 8 + 20] // width
sub esi, eax
sub edi, eax
sub edx, eax
pxor xmm5, xmm5 // constant 0
align 16
convertloop:
movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
punpcklbw xmm0, xmm5
punpcklbw xmm1, xmm5
psubw xmm0, xmm1
movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
punpcklbw xmm1, xmm5
punpcklbw xmm2, xmm5
psubw xmm1, xmm2
movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0]
movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2]
punpcklbw xmm2, xmm5
punpcklbw xmm3, xmm5
psubw xmm2, xmm3
paddw xmm0, xmm2
paddw xmm0, xmm1
paddw xmm0, xmm1
pabsw xmm0, xmm0 // SSSE3. Could use SSE2 psubusw twice instead.
packuswb xmm0, xmm0
sub ecx, 8
movq qword ptr [eax + edx], xmm0
lea eax, [eax + 8]
jg convertloop
pop edi
pop esi
ret
}
}
#endif // HAS_SOBELXROW_SSSE3
#ifdef HAS_SOBELYROW_SSSE3
// SobelY as a matrix is
// -1 -2 -1
// 0 0 0
// 1 2 1
__declspec(naked) __declspec(align(16))
void SobelYRow_SSSE3(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) {
__asm {
push esi
mov eax, [esp + 4 + 4] // src_y0
mov esi, [esp + 4 + 8] // src_y1
mov edx, [esp + 4 + 12] // dst_sobely
mov ecx, [esp + 4 + 16] // width
sub esi, eax
sub edx, eax
pxor xmm5, xmm5 // constant 0
align 16
convertloop:
movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0]
movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
punpcklbw xmm0, xmm5
punpcklbw xmm1, xmm5
psubw xmm0, xmm1
movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1]
movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1]
punpcklbw xmm1, xmm5
punpcklbw xmm2, xmm5
psubw xmm1, xmm2
movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2]
punpcklbw xmm2, xmm5
punpcklbw xmm3, xmm5
psubw xmm2, xmm3
paddw xmm0, xmm2
paddw xmm0, xmm1
paddw xmm0, xmm1
pabsw xmm0, xmm0 // SSSE3. Could use SSE2 psubusw twice instead.
packuswb xmm0, xmm0
sub ecx, 8
movq qword ptr [eax + edx], xmm0
lea eax, [eax + 8]
jg convertloop
pop esi
ret
}
}
#endif // HAS_SOBELYROW_SSSE3
#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
// Consider float CumulativeSum.
// Consider calling CumulativeSum one row at time as needed.
......
......@@ -20,6 +20,7 @@
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/row.h" // For Sobel
#include "../unit_test/unit_test.h"
#if defined(_MSC_VER)
......@@ -890,6 +891,77 @@ TEST_F(libyuvTest, TestAffine) {
#endif
}
TEST_F(libyuvTest, TestSobelX) {
SIMD_ALIGNED(uint8 orig_pixels_0[256 + 2]);
SIMD_ALIGNED(uint8 orig_pixels_1[256 + 2]);
SIMD_ALIGNED(uint8 orig_pixels_2[256 + 2]);
SIMD_ALIGNED(uint8 sobel_pixels_c[256]);
for (int i = 0; i < 256 + 2; ++i) {
orig_pixels_0[i] = i;
orig_pixels_1[i] = i * 2;
orig_pixels_2[i] = i * 3;
}
SobelXRow_C(orig_pixels_0, orig_pixels_1, orig_pixels_2,
sobel_pixels_c, 256);
EXPECT_EQ(16u, sobel_pixels_c[0]);
EXPECT_EQ(16u, sobel_pixels_c[100]);
EXPECT_EQ(255u, sobel_pixels_c[255]);
#if defined(HAS_SOBELXROW_SSSE3)
SIMD_ALIGNED(uint8 sobel_pixels_opt[256]);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
if (has_ssse3) {
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
SobelXRow_SSSE3(orig_pixels_0, orig_pixels_1, orig_pixels_2,
sobel_pixels_opt, 256);
}
} else {
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
SobelXRow_C(orig_pixels_0, orig_pixels_1, orig_pixels_2,
sobel_pixels_opt, 256);
}
}
for (int i = 0; i < 256; ++i) {
EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
}
#endif
}
TEST_F(libyuvTest, TestSobelY) {
SIMD_ALIGNED(uint8 orig_pixels_0[256 + 2]);
SIMD_ALIGNED(uint8 orig_pixels_1[256 + 2]);
SIMD_ALIGNED(uint8 sobel_pixels_c[256]);
for (int i = 0; i < 256 + 2; ++i) {
orig_pixels_0[i] = i;
orig_pixels_1[i] = i * 2;
}
SobelYRow_C(orig_pixels_0, orig_pixels_1, sobel_pixels_c, 256);
EXPECT_EQ(4u, sobel_pixels_c[0]);
EXPECT_EQ(255u, sobel_pixels_c[100]);
EXPECT_EQ(0u, sobel_pixels_c[255]);
#if defined(HAS_SOBELYROW_SSSE3)
SIMD_ALIGNED(uint8 sobel_pixels_opt[256]);
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
if (has_ssse3) {
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
SobelYRow_SSSE3(orig_pixels_0, orig_pixels_1, sobel_pixels_opt, 256);
}
} else {
for (int i = 0; i < benchmark_pixels_div256_; ++i) {
SobelYRow_C(orig_pixels_0, orig_pixels_1, sobel_pixels_opt, 256);
}
}
for (int i = 0; i < 256; ++i) {
EXPECT_EQ(sobel_pixels_opt[i], sobel_pixels_c[i]);
}
#endif
}
TEST_F(libyuvTest, TestCopyPlane) {
int err = 0;
int yw = benchmark_width_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment