Commit bd2d903e authored by fbarchard@google.com's avatar fbarchard@google.com

odd width support for ARGBSobel functions. Improves performance for images that…

odd width support for ARGBSobel functions.  Improves performance for images that are not a multiple of 8 pixels.
BUG=444
TESTED=libyuvTest.ARGBSobel_Opt
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/54589004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1415 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent cfce47ef
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1416 Version: 1417
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -1819,6 +1819,18 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, ...@@ -1819,6 +1819,18 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width); uint8* dst_argb, int width);
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width); uint8* dst_argb, int width);
void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width);
void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_y, int width);
void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width);
void ARGBPolynomialRow_C(const uint8* src_argb, void ARGBPolynomialRow_C(const uint8* src_argb,
uint8* dst_argb, const float* poly, uint8* dst_argb, const float* poly,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1416 #define LIBYUV_VERSION 1417
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -2095,14 +2095,20 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb, ...@@ -2095,14 +2095,20 @@ int ARGBSobel(const uint8* src_argb, int src_stride_argb,
void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) = SobelRow_C; uint8* dst_argb, int width) = SobelRow_C;
#if defined(HAS_SOBELROW_SSE2) #if defined(HAS_SOBELROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelRow = SobelRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
SobelRow = SobelRow_SSE2; SobelRow = SobelRow_SSE2;
} }
}
#endif #endif
#if defined(HAS_SOBELROW_NEON) #if defined(HAS_SOBELROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasNEON)) {
SobelRow = SobelRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
SobelRow = SobelRow_NEON; SobelRow = SobelRow_NEON;
} }
}
#endif #endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelRow); width, height, SobelRow);
...@@ -2116,14 +2122,20 @@ int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, ...@@ -2116,14 +2122,20 @@ int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_, int width) = SobelToPlaneRow_C; uint8* dst_, int width) = SobelToPlaneRow_C;
#if defined(HAS_SOBELTOPLANEROW_SSE2) #if defined(HAS_SOBELTOPLANEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
SobelToPlaneRow = SobelToPlaneRow_SSE2; SobelToPlaneRow = SobelToPlaneRow_SSE2;
} }
}
#endif #endif
#if defined(HAS_SOBELTOPLANEROW_NEON) #if defined(HAS_SOBELTOPLANEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasNEON)) {
SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
SobelToPlaneRow = SobelToPlaneRow_NEON; SobelToPlaneRow = SobelToPlaneRow_NEON;
} }
}
#endif #endif
return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
width, height, SobelToPlaneRow); width, height, SobelToPlaneRow);
...@@ -2138,14 +2150,20 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, ...@@ -2138,14 +2150,20 @@ int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width) = SobelXYRow_C; uint8* dst_argb, int width) = SobelXYRow_C;
#if defined(HAS_SOBELXYROW_SSE2) #if defined(HAS_SOBELXYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { if (TestCpuFlag(kCpuHasSSE2)) {
SobelXYRow = SobelXYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) {
SobelXYRow = SobelXYRow_SSE2; SobelXYRow = SobelXYRow_SSE2;
} }
}
#endif #endif
#if defined(HAS_SOBELXYROW_NEON) #if defined(HAS_SOBELXYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasNEON)) {
SobelXYRow = SobelXYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
SobelXYRow = SobelXYRow_NEON; SobelXYRow = SobelXYRow_NEON;
} }
}
#endif #endif
return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width, height, SobelXYRow); width, height, SobelXYRow);
......
...@@ -558,51 +558,71 @@ MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15) ...@@ -558,51 +558,71 @@ MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15)
#endif #endif
#undef MERGEUVROW_ANY #undef MERGEUVROW_ANY
#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \ #define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, SBPP, DBPP, MASK) \
void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \ void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \
uint8* dst_argb, int width) { \ uint8* dst_argb, int width) { \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \ ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \
} \ } \
ARGBMATH_C(src_argb0 + n * 4, \ ARGBMATH_C(src_argb0 + n * SBPP, \
src_argb1 + n * 4, \ src_argb1 + n * SBPP, \
dst_argb + n * 4, \ dst_argb + n * DBPP, \
width & MASK); \ width & MASK); \
} }
#ifdef HAS_ARGBMULTIPLYROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_SSE2
MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C, MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C,
3) 4, 4, 3)
#endif #endif
#ifdef HAS_ARGBADDROW_SSE2 #ifdef HAS_ARGBADDROW_SSE2
MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3) MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 4, 4, 3)
#endif #endif
#ifdef HAS_ARGBSUBTRACTROW_SSE2 #ifdef HAS_ARGBSUBTRACTROW_SSE2
MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C, MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C,
3) 4, 4, 3)
#endif #endif
#ifdef HAS_ARGBMULTIPLYROW_AVX2 #ifdef HAS_ARGBMULTIPLYROW_AVX2
MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C, MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C,
7) 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBADDROW_AVX2 #ifdef HAS_ARGBADDROW_AVX2
MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7) MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBSUBTRACTROW_AVX2 #ifdef HAS_ARGBSUBTRACTROW_AVX2
MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C, MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C,
7) 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBMULTIPLYROW_NEON #ifdef HAS_ARGBMULTIPLYROW_NEON
MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C, MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C,
7) 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBADDROW_NEON #ifdef HAS_ARGBADDROW_NEON
MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7) MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBSUBTRACTROW_NEON #ifdef HAS_ARGBSUBTRACTROW_NEON
MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C, MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C,
7) 4, 4, 7)
#endif
#ifdef HAS_SOBELROW_SSE2
MATHROW_ANY(SobelRow_Any_SSE2, SobelRow_SSE2, SobelRow_C, 1, 4, 15)
#endif
#ifdef HAS_SOBELROW_NEON
MATHROW_ANY(SobelRow_Any_NEON, SobelRow_NEON, SobelRow_C, 1, 4, 7)
#endif
#ifdef HAS_SOBELTOPLANEROW_SSE2
MATHROW_ANY(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, SobelToPlaneRow_C,
1, 1, 15)
#endif
#ifdef HAS_SOBELTOPLANEROW_NEON
MATHROW_ANY(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, SobelToPlaneRow_C,
1, 1, 7)
#endif
#ifdef HAS_SOBELXYROW_SSE2
MATHROW_ANY(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, SobelXYRow_C, 1, 4, 15)
#endif
#ifdef HAS_SOBELXYROW_NEON
MATHROW_ANY(SobelXYRow_Any_NEON, SobelXYRow_NEON, SobelXYRow_C, 1, 4, 7)
#endif #endif
#undef MATHROW_ANY #undef MATHROW_ANY
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment