Commit f38aefef authored by fbarchard@google.com's avatar fbarchard@google.com

Blur use circular buffer of Cumulative Sums instead of full frame for better cache coherency.

BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/646008

git-svn-id: http://libyuv.googlecode.com/svn/trunk@283 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent f51e8791
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 282 Version: 283
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 282 #define LIBYUV_VERSION 283
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -1709,26 +1709,50 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, ...@@ -1709,26 +1709,50 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int32* dst_cumsum, int dst_stride32_cumsum, int32* dst_cumsum, int dst_stride32_cumsum,
int width, int height, int radius) { int width, int height, int radius) {
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft, void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C; int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C;
#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2) #if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
CumulativeSumToAverage = CumulativeSumToAverage_SSE2; CumulativeSumToAverage = CumulativeSumToAverage_SSE2;
} }
#endif #endif
ARGBComputeCumulativeSum(src_argb, src_stride_argb, ARGBComputeCumulativeSum(src_argb, src_stride_argb,
dst_cumsum, dst_stride32_cumsum, dst_cumsum, dst_stride32_cumsum,
width, height); width, radius);
src_argb = src_argb + radius * src_stride_argb;
int32* cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
int32* max_cumsum_bot_row =
&dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
int32* cumsum_top_row = &dst_cumsum[0];
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
int32* cumsum_top_row = &dst_cumsum[top_y * dst_stride32_cumsum]; int area = radius * (bot_y - top_y);
int32* cumsum_bot_row = &dst_cumsum[bot_y * dst_stride32_cumsum];
if (top_y) {
cumsum_top_row += dst_stride32_cumsum;
if (cumsum_top_row >= max_cumsum_bot_row) {
cumsum_top_row = dst_cumsum;
}
}
if ((y + radius) < height) {
int32* prev_cumsum_bot_row = cumsum_bot_row;
cumsum_bot_row += dst_stride32_cumsum;
if (cumsum_bot_row >= max_cumsum_bot_row) {
cumsum_bot_row = dst_cumsum;
}
ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
width);
src_argb += src_stride_argb;
}
// Left clipped. // Left clipped.
int area = radius * (bot_y - top_y);
int boxwidth = radius * 4; int boxwidth = radius * 4;
int x; int x;
for (x = 0; x < radius + 1; ++x) { for (x = 0; x < radius + 1; ++x) {
......
...@@ -2989,8 +2989,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, ...@@ -2989,8 +2989,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
"10: \n" "10: \n"
"movd (%0),%%xmm2 \n" "movd (%0),%%xmm2 \n"
"lea 0x4(%0),%0 \n" "lea 0x4(%0),%0 \n"
"punpcklbw %%xmm4,%%xmm2 \n" "punpcklbw %%xmm1,%%xmm2 \n"
"punpcklwd %%xmm4,%%xmm2 \n" "punpcklwd %%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm0 \n" "paddd %%xmm2,%%xmm0 \n"
"movdqu (%1,%2,1),%%xmm2 \n" "movdqu (%1,%2,1),%%xmm2 \n"
"paddd %%xmm0,%%xmm2 \n" "paddd %%xmm0,%%xmm2 \n"
......
...@@ -3187,8 +3187,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, ...@@ -3187,8 +3187,8 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
l1: l1:
movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes. movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes.
lea eax, [eax + 4] lea eax, [eax + 4]
punpcklbw xmm2, xmm4 punpcklbw xmm2, xmm1
punpcklwd xmm2, xmm4 punpcklwd xmm2, xmm1
paddd xmm0, xmm2 paddd xmm0, xmm2
movdqu xmm2, [edx + esi] movdqu xmm2, [edx + esi]
paddd xmm2, xmm0 paddd xmm2, xmm0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment