Commit 1e985bbc authored by fbarchard@google.com's avatar fbarchard@google.com

ARGBSobel

BUG=201
TEST=Sobel*
Review URL: https://webrtc-codereview.appspot.com/1221005

git-svn-id: http://libyuv.googlecode.com/svn/trunk@609 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent e1247eec
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 608
Version: 609
License: BSD
License File: LICENSE
......
......@@ -347,6 +347,12 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_argb, int dst_stride_argb,
const uint8* shuffler, int width, int height);
// Sobel ARGB effect.
LIBYUV_API
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 608
#define LIBYUV_VERSION 609
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -71,7 +71,6 @@
}],
], # conditions
},
], # targets
}
......
......@@ -1822,6 +1822,159 @@ int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
return 0;
}
// Sobel ARGB effect.
// TODO(fbarchard): Enable AVX2. Mixing SSSE3 and AVX2 requires zeroupper.
LIBYUV_API
int ARGBSobel(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb || !dst_argb ||
width <= 0 || height == 0 || width > kMaxStride) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
#if defined(HAS_ARGBTOYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
// Assumed row buffer aligned.
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#endif
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
bool clear = false;
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
clear = true;
ARGBToYRow = ARGBToYRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYRow = ARGBToYRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
}
#endif
void (*YToARGBRow)(const uint8* y_buf,
uint8* rgb_buf,
int width) = YToARGBRow_C;
#if defined(HAS_YTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
YToARGBRow = YToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
YToARGBRow = YToARGBRow_SSE2;
}
}
#elif defined(HAS_YTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YToARGBRow = YToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
YToARGBRow = YToARGBRow_NEON;
}
}
#endif
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C;
#if defined(HAS_SOBELYROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SobelYRow = SobelYRow_SSSE3;
}
#endif
void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
const uint8* src_y2, uint8* dst_sobely, int width) =
SobelXRow_C;
#if defined(HAS_SOBELXROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
SobelXRow = SobelXRow_SSSE3;
}
#endif
void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
int width) = ARGBAddRow_C;
#if defined(HAS_ARGBADDROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBAddRow = ARGBAddRow_SSE2;
}
#endif
#if defined(HAS_ARGBADDROW_AVX2_DISABLED)
if (TestCpuFlag(kCpuHasAVX2)) {
clear = true;
ARGBAddRow = ARGBAddRow_AVX2;
}
#endif
#if defined(HAS_ARGBADDROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBAddRow = ARGBAddRow_NEON;
}
#endif
const int kEdge = 16; // Extra pixels at start of row for extrude/align.
SIMD_ALIGNED(uint8 row_y[(kMaxStride + kEdge) * 3 + kEdge]);
SIMD_ALIGNED(uint8 row_sobelx[kMaxStride]);
SIMD_ALIGNED(uint8 row_sobely[kMaxStride]);
SIMD_ALIGNED(uint8 row_sobel[kMaxStride]);
// Convert first row.
uint8* row_y0 = row_y + kEdge;
uint8* row_y1 = row_y0 + kMaxStride;
uint8* row_y2 = row_y1 + kMaxStride;
ARGBToYRow(src_argb, row_y0, width);
row_y0[-1] = row_y0[0];
row_y0[width] = row_y0[width - 1];
ARGBToYRow(src_argb, row_y1, width);
row_y1[-1] = row_y1[0];
row_y1[width] = row_y1[width - 1];
int awidth = (width + 3) >> 2;
for (int y = 0; y < height; ++y) {
// Convert next row of ARGB to Y.
if (y < (height - 1)) {
src_argb += src_stride_argb;
}
ARGBToYRow(src_argb, row_y2, width);
row_y2[-1] = row_y2[0];
row_y2[width] = row_y2[width - 1];
SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
ARGBAddRow(row_sobelx, row_sobely, row_sobel, awidth);
YToARGBRow(row_sobel, dst_argb, width);
// Cycle thru circular queue of 3 row_y buffers.
uint8* row_yt = row_y0;
row_y0 = row_y1;
row_y1 = row_y2;
row_y2 = row_yt;
dst_argb += dst_stride_argb;
}
#if defined(HAS_ARGBTOYROW_AVX2_DISABLED)
if (clear) {
__asm vzeroupper;
}
#endif
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -1232,4 +1232,67 @@ TEST_F(libyuvTest, ARGBSubtract_Opt) {
EXPECT_LE(max_diff, 1);
}
static int TestSobel(int width, int height, int benchmark_iterations,
int invert, int off) {
const int kBpp = 4;
const int kStride = (width * kBpp + 15) & ~15;
align_buffer_64(src_argb_a, kStride * height + off);
align_buffer_64(dst_argb_c, kStride * height);
align_buffer_64(dst_argb_opt, kStride * height);
srandom(time(NULL));
for (int i = 0; i < kStride * height; ++i) {
src_argb_a[i + off] = (random() & 0xff);
}
memset(dst_argb_c, 0, kStride * height);
memset(dst_argb_opt, 0, kStride * height);
MaskCpuFlags(0);
ARGBSobel(src_argb_a + off, kStride,
dst_argb_c, kStride,
width, invert * height);
MaskCpuFlags(-1);
for (int i = 0; i < benchmark_iterations; ++i) {
ARGBSobel(src_argb_a + off, kStride,
dst_argb_opt, kStride,
width, invert * height);
}
int max_diff = 0;
for (int i = 0; i < kStride * height; ++i) {
int abs_diff =
abs(static_cast<int>(dst_argb_c[i]) -
static_cast<int>(dst_argb_opt[i]));
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_64(src_argb_a)
free_aligned_buffer_64(dst_argb_c)
free_aligned_buffer_64(dst_argb_opt)
return max_diff;
}
TEST_F(libyuvTest, ARGBSobel_Any) {
int max_diff = TestSobel(benchmark_width_ - 1, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 14);
}
TEST_F(libyuvTest, ARGBSobel_Unaligned) {
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 1);
EXPECT_LE(max_diff, 14);
}
TEST_F(libyuvTest, ARGBSobel_Invert) {
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
benchmark_iterations_, -1, 0);
EXPECT_LE(max_diff, 14);
}
TEST_F(libyuvTest, ARGBSobel_Opt) {
int max_diff = TestSobel(benchmark_width_, benchmark_height_,
benchmark_iterations_, +1, 0);
EXPECT_LE(max_diff, 14);
}
} // namespace libyuv
......@@ -39,8 +39,9 @@ libyuvTest::libyuvTest() : rotate_max_w_(128), rotate_max_h_(128),
if (height) {
benchmark_height_ = atoi(height); // NOLINT
}
benchmark_pixels_div256_ = (benchmark_iterations_ * benchmark_width_ *
benchmark_height_ + 255) / 256;
benchmark_pixels_div256_ = static_cast<int>(
(static_cast<double>(benchmark_width_ *
benchmark_height_) * benchmark_iterations_ + 255.0) / 256.0);
}
int main(int argc, char** argv) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment