Commit 852f4854 authored by fbarchard@google.com's avatar fbarchard@google.com

Neon version of new SetRow functions for rectangles.

BUG=387
TESTED=untested
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/39449004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1220 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 8e3db2dc
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1219 Version: 1220
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -302,7 +302,7 @@ extern "C" { ...@@ -302,7 +302,7 @@ extern "C" {
#define HAS_RGB565TOYROW_NEON #define HAS_RGB565TOYROW_NEON
#define HAS_RGBATOUVROW_NEON #define HAS_RGBATOUVROW_NEON
#define HAS_RGBATOYROW_NEON #define HAS_RGBATOYROW_NEON
#define HAS_SETROW_NEON // #define HAS_SETROW_NEON
#define HAS_SPLITUVROW_NEON #define HAS_SPLITUVROW_NEON
#define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUV422ROW_NEON
...@@ -332,7 +332,7 @@ extern "C" { ...@@ -332,7 +332,7 @@ extern "C" {
#define HAS_SOBELXYROW_NEON #define HAS_SOBELXYROW_NEON
#define HAS_SOBELYROW_NEON #define HAS_SOBELYROW_NEON
#define HAS_ARGBCOLORMATRIXROW_NEON #define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBSETROWS_NEON // #define HAS_ARGBSETROWS_NEON
#define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSHUFFLEROW_NEON
#endif #endif
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1219 #define LIBYUV_VERSION 1220
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -845,30 +845,36 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { ...@@ -845,30 +845,36 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
); );
} }
// SetRow8 writes 'count' bytes using a 32 bit value repeated. // SetRow writes 'count' bytes using an 8 bit value repeated.
void SetRow_NEON(uint8* dst, uint32 v32, int count) { void SetRow_NEON(uint8* dst, uint32 v8, int count) {
asm volatile ( asm volatile (
"vdup.u32 q0, %2 \n" // duplicate 4 ints "vdup.8 q0, %2 \n" // duplicate 16 bytes
"1: \n" "1: \n"
"subs %1, %1, #16 \n" // 16 bytes per loop "subs %1, %1, #16 \n" // 16 bytes per loop
MEMACCESS(0) MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n" // store "vst1.8 {q0}, [%0]! \n" // store
"bgt 1b \n" "bgt 1b \n"
: "+r"(dst), // %0 : "+r"(dst), // %0
"+r"(count) // %1 "+r"(count) // %1
: "r"(v32) // %2 : "r"(v8) // %2
: "cc", "memory", "q0" : "cc", "memory", "q0"
); );
} }
// TODO(fbarchard): Make fully assembler // ARGBSetRow writes 'count' pixels using an 32 bit value repeated.
// SetRow32 writes 'count' words using a 32 bit value repeated. void ARGBSetRow_NEON(uint8* dst, uint32 v8, int count) {
void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, asm volatile (
int dst_stride, int height) { "vdup.u32 q0, %2 \n" // duplicate 4 ints
for (int y = 0; y < height; ++y) { "1: \n"
SetRow_NEON(dst, v32, width << 2); "subs %1, %1, #4 \n" // 4 pixels per loop
dst += dst_stride; MEMACCESS(0)
} "vst1.8 {q0}, [%0]! \n" // store
"bgt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
: "cc", "memory", "q0"
);
} }
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
......
...@@ -735,35 +735,36 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { ...@@ -735,35 +735,36 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
} }
#endif // HAS_COPYROW_NEON #endif // HAS_COPYROW_NEON
// SetRow8 writes 'count' bytes using a 32 bit value repeated. // SetRow writes 'count' bytes using an 8 bit value repeated.
#ifdef HAS_SETROW_NEON void SetRow_NEON(uint8* dst, uint32 v8, int count) {
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
asm volatile ( asm volatile (
"dup v0.4s, %w2 \n" // duplicate 4 ints "dup v0.16b, %w2 \n" // duplicate 16 bytes
"1: \n" "1: \n"
"subs %1, %1, #16 \n" // 16 bytes per loop "subs %1, %1, #16 \n" // 16 bytes per loop
MEMACCESS(0) MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" // store "st1 {v0.16b}, [%0], #16 \n" // store
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(dst), // %0 : "+r"(dst), // %0
"+r"(count) // %1 "+r"(count) // %1
: "r"(v32) // %2 : "r"(v8) // %2
: "cc", "memory", "v0" : "cc", "memory", "v0"
); );
} }
#endif // HAS_SETROW_NEON
// TODO(fbarchard): Make fully assembler void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
// SetRow32 writes 'count' words using a 32 bit value repeated. asm volatile (
#ifdef HAS_ARGBSETROWS_NEON "dup v0.4s, %w2 \n" // duplicate 4 ints
void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, "1: \n"
int dst_stride, int height) { "subs %1, %1, #4 \n" // 4 ints per loop
for (int y = 0; y < height; ++y) { MEMACCESS(0)
SetRow_NEON(dst, v32, width << 2); "st1 {v0.16b}, [%0], #16 \n" // store
dst += dst_stride; "b.gt 1b \n"
} : "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
: "cc", "memory", "v0"
);
} }
#endif // HAS_ARGBSETROWS_NEON
#ifdef HAS_MIRRORROW_NEON #ifdef HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment