Commit 852f4854 authored by fbarchard@google.com's avatar fbarchard@google.com

Neon version of new SetRow functions for rectangles.

BUG=387
TESTED=untested
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/39449004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1220 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 8e3db2dc
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1219
Version: 1220
License: BSD
License File: LICENSE
......
......@@ -302,7 +302,7 @@ extern "C" {
#define HAS_RGB565TOYROW_NEON
#define HAS_RGBATOUVROW_NEON
#define HAS_RGBATOYROW_NEON
#define HAS_SETROW_NEON
// #define HAS_SETROW_NEON
#define HAS_SPLITUVROW_NEON
#define HAS_UYVYTOARGBROW_NEON
#define HAS_UYVYTOUV422ROW_NEON
......@@ -332,7 +332,7 @@ extern "C" {
#define HAS_SOBELXYROW_NEON
#define HAS_SOBELYROW_NEON
#define HAS_ARGBCOLORMATRIXROW_NEON
#define HAS_ARGBSETROWS_NEON
// #define HAS_ARGBSETROWS_NEON
#define HAS_ARGBSHUFFLEROW_NEON
#endif
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1219
#define LIBYUV_VERSION 1220
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -845,10 +845,10 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
);
}
// SetRow8 writes 'count' bytes using a 32 bit value repeated.
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
// SetRow writes 'count' bytes using an 8 bit value repeated.
void SetRow_NEON(uint8* dst, uint32 v8, int count) {
asm volatile (
"vdup.u32 q0, %2 \n" // duplicate 4 ints
"vdup.8 q0, %2 \n" // duplicate 16 bytes
"1: \n"
"subs %1, %1, #16 \n" // 16 bytes per loop
MEMACCESS(0)
......@@ -856,19 +856,25 @@ void SetRow_NEON(uint8* dst, uint32 v32, int count) {
"bgt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
: "r"(v8) // %2
: "cc", "memory", "q0"
);
}
// TODO(fbarchard): Make fully assembler
// SetRow32 writes 'count' words using a 32 bit value repeated.
void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
int dst_stride, int height) {
for (int y = 0; y < height; ++y) {
SetRow_NEON(dst, v32, width << 2);
dst += dst_stride;
}
// ARGBSetRow writes 'count' pixels using an 32 bit value repeated.
void ARGBSetRow_NEON(uint8* dst, uint32 v8, int count) {
asm volatile (
"vdup.u32 q0, %2 \n" // duplicate 4 ints
"1: \n"
"subs %1, %1, #4 \n" // 4 pixels per loop
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n" // store
"bgt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
: "cc", "memory", "q0"
);
}
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
......
......@@ -735,11 +735,10 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
}
#endif // HAS_COPYROW_NEON
// SetRow8 writes 'count' bytes using a 32 bit value repeated.
#ifdef HAS_SETROW_NEON
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
// SetRow writes 'count' bytes using an 8 bit value repeated.
void SetRow_NEON(uint8* dst, uint32 v8, int count) {
asm volatile (
"dup v0.4s, %w2 \n" // duplicate 4 ints
"dup v0.16b, %w2 \n" // duplicate 16 bytes
"1: \n"
"subs %1, %1, #16 \n" // 16 bytes per loop
MEMACCESS(0)
......@@ -747,23 +746,25 @@ void SetRow_NEON(uint8* dst, uint32 v32, int count) {
"b.gt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
: "r"(v8) // %2
: "cc", "memory", "v0"
);
}
#endif // HAS_SETROW_NEON
// TODO(fbarchard): Make fully assembler
// SetRow32 writes 'count' words using a 32 bit value repeated.
#ifdef HAS_ARGBSETROWS_NEON
void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
int dst_stride, int height) {
for (int y = 0; y < height; ++y) {
SetRow_NEON(dst, v32, width << 2);
dst += dst_stride;
}
void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
asm volatile (
"dup v0.4s, %w2 \n" // duplicate 4 ints
"1: \n"
"subs %1, %1, #4 \n" // 4 ints per loop
MEMACCESS(0)
"st1 {v0.16b}, [%0], #16 \n" // store
"b.gt 1b \n"
: "+r"(dst), // %0
"+r"(count) // %1
: "r"(v32) // %2
: "cc", "memory", "v0"
);
}
#endif // HAS_ARGBSETROWS_NEON
#ifdef HAS_MIRRORROW_NEON
void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment