Commit f7a5048f authored by fbarchard@google.com's avatar fbarchard@google.com

align asm new line to column 48

BUG=none
TEST=builds
Review URL: http://webrtc-codereview.appspot.com/268008

git-svn-id: http://libyuv.googlecode.com/svn/trunk@73 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 2cb934c6
...@@ -23,35 +23,30 @@ namespace libyuv { ...@@ -23,35 +23,30 @@ namespace libyuv {
static uint32 SumSquareError_NEON(const uint8* src_a, static uint32 SumSquareError_NEON(const uint8* src_a,
const uint8* src_b, int count) { const uint8* src_b, int count) {
volatile uint32 sse; volatile uint32 sse;
asm volatile asm volatile (
( "vmov.u8 q7, #0 \n"
"vmov.u8 q7, #0\n" "vmov.u8 q9, #0 \n"
"vmov.u8 q9, #0\n" "vmov.u8 q8, #0 \n"
"vmov.u8 q8, #0\n" "vmov.u8 q10, #0 \n"
"vmov.u8 q10, #0\n"
"1: \n"
"1:\n" "vld1.u8 {q0}, [%0]! \n"
"vld1.u8 {q0}, [%0]!\n" "vld1.u8 {q1}, [%1]! \n"
"vld1.u8 {q1}, [%1]!\n" "vsubl.u8 q2, d0, d2 \n"
"vsubl.u8 q3, d1, d3 \n"
"vsubl.u8 q2, d0, d2\n" "vmlal.s16 q7, d4, d4 \n"
"vsubl.u8 q3, d1, d3\n" "vmlal.s16 q8, d6, d6 \n"
"vmlal.s16 q8, d5, d5 \n"
"vmlal.s16 q7, d4, d4\n" "vmlal.s16 q10, d7, d7 \n"
"vmlal.s16 q8, d6, d6\n" "subs %2, %2, #16 \n"
"vmlal.s16 q8, d5, d5\n" "bhi 1b \n"
"vmlal.s16 q10, d7, d7\n"
"vadd.u32 q7, q7, q8 \n"
"subs %2, %2, #16\n" "vadd.u32 q9, q9, q10 \n"
"bhi 1b\n" "vadd.u32 q10, q7, q9 \n"
"vpaddl.u32 q1, q10 \n"
"vadd.u32 q7, q7, q8\n" "vadd.u64 d0, d2, d3 \n"
"vadd.u32 q9, q9, q10\n" "vmov.32 %3, d0[0] \n"
"vadd.u32 q10, q7, q9\n"
"vpaddl.u32 q1, q10\n"
"vadd.u64 d0, d2, d3\n"
"vmov.32 %3, d0[0]\n"
: "+r"(src_a), : "+r"(src_a),
"+r"(src_b), "+r"(src_b),
"+r"(count), "+r"(count),
...@@ -59,7 +54,6 @@ static uint32 SumSquareError_NEON(const uint8* src_a, ...@@ -59,7 +54,6 @@ static uint32 SumSquareError_NEON(const uint8* src_a,
: :
: "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10" : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"
); );
return sse; return sse;
} }
...@@ -102,7 +96,6 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, ...@@ -102,7 +96,6 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
pshufd xmm1, xmm0, 01h pshufd xmm1, xmm0, 01h
paddd xmm0, xmm1 paddd xmm0, xmm1
movd eax, xmm0 movd eax, xmm0
ret ret
} }
} }
...@@ -112,11 +105,12 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, ...@@ -112,11 +105,12 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
// DISABLE // DISABLE
//#define HAS_SUMSQUAREERROR_SSE2 //#define HAS_SUMSQUAREERROR_SSE2
// DISABLE // DISABLE
#if HAS_SUMSQUAREERROR_SSE2
static uint32 SumSquareError_SSE2(const uint8* src_a, static uint32 SumSquareError_SSE2(const uint8* src_a,
const uint8* src_b, int count) { const uint8* src_b, int count) {
volatile uint32 sse; volatile uint32 sse;
asm volatile( asm volatile (
"\n" " \n"
: "+r"(src_a), // %0 : "+r"(src_a), // %0
"+r"(src_b), // %1 "+r"(src_b), // %1
"+r"(count), // %2 "+r"(count), // %2
...@@ -131,6 +125,7 @@ static uint32 SumSquareError_SSE2(const uint8* src_a, ...@@ -131,6 +125,7 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
} }
#endif #endif
#endif #endif
#endif
static uint32 SumSquareError_C(const uint8* src_a, static uint32 SumSquareError_C(const uint8* src_a,
const uint8* src_b, int count) { const uint8* src_b, int count) {
...@@ -148,7 +143,6 @@ uint64 ComputeSumSquareError(const uint8* src_a, ...@@ -148,7 +143,6 @@ uint64 ComputeSumSquareError(const uint8* src_a,
const uint8* src_b, int count) { const uint8* src_b, int count) {
uint32 (*SumSquareError)(const uint8* src_a, uint32 (*SumSquareError)(const uint8* src_a,
const uint8* src_b, int count); const uint8* src_b, int count);
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON; SumSquareError = SumSquareError_NEON;
...@@ -162,10 +156,8 @@ uint64 ComputeSumSquareError(const uint8* src_a, ...@@ -162,10 +156,8 @@ uint64 ComputeSumSquareError(const uint8* src_a,
{ {
SumSquareError = SumSquareError_C; SumSquareError = SumSquareError_C;
} }
const int kBlockSize = 4096; const int kBlockSize = 4096;
uint64 diff = 0; uint64 diff = 0;
while (count >= kBlockSize) { while (count >= kBlockSize) {
diff += SumSquareError(src_a, src_b, kBlockSize); diff += SumSquareError(src_a, src_b, kBlockSize);
src_a += kBlockSize; src_a += kBlockSize;
...@@ -179,7 +171,6 @@ uint64 ComputeSumSquareError(const uint8* src_a, ...@@ -179,7 +171,6 @@ uint64 ComputeSumSquareError(const uint8* src_a,
diff += static_cast<uint64>(SumSquareError_C(src_a, src_b, count)); diff += static_cast<uint64>(SumSquareError_C(src_a, src_b, count));
} }
} }
return diff; return diff;
} }
...@@ -188,7 +179,6 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, ...@@ -188,7 +179,6 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
int width, int height) { int width, int height) {
uint32 (*SumSquareError)(const uint8* src_a, uint32 (*SumSquareError)(const uint8* src_a,
const uint8* src_b, int count); const uint8* src_b, int count);
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
(width % 16 == 0)) { (width % 16 == 0)) {
...@@ -200,7 +190,6 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, ...@@ -200,7 +190,6 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
} }
uint64 sse = 0; uint64 sse = 0;
for (int h = 0; h < height; ++h) { for (int h = 0; h < height; ++h) {
sse += static_cast<uint64>(SumSquareError(src_a, src_b, width)); sse += static_cast<uint64>(SumSquareError(src_a, src_b, width));
src_a += stride_a; src_a += stride_a;
...@@ -210,11 +199,10 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, ...@@ -210,11 +199,10 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
return sse; return sse;
} }
double Sse2Psnr(double Samples, double Sse) { double Sse2Psnr(double samples, double sse) {
double psnr; double psnr;
if (sse > 0.0)
if (Sse > 0.0) psnr = 10.0 * log10(255.0 * 255.0 * samples / sse);
psnr = 10.0 * log10(255.0 * 255.0 * Samples / Sse);
else else
psnr = kMaxPsnr; // Limit to prevent divide by 0 psnr = kMaxPsnr; // Limit to prevent divide by 0
...@@ -224,6 +212,21 @@ double Sse2Psnr(double Samples, double Sse) { ...@@ -224,6 +212,21 @@ double Sse2Psnr(double Samples, double Sse) {
return psnr; return psnr;
} }
double Sse2Psnr(uint64 samples, uint64 sse) {
double psnr;
if (sse > 0) {
double mse = static_cast<double>(samples) / static_cast<double>(sse);
psnr = 10.0 * log10(255.0 * 255.0 * mse);
} else {
psnr = kMaxPsnr; // Limit to prevent divide by 0
}
if (psnr > kMaxPsnr)
psnr = kMaxPsnr;
return psnr;
}
double CalcFramePsnr(const uint8* src_a, int stride_a, double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b, const uint8* src_b, int stride_b,
int width, int height) { int width, int height) {
...@@ -233,7 +236,7 @@ double CalcFramePsnr(const uint8* src_a, int stride_a, ...@@ -233,7 +236,7 @@ double CalcFramePsnr(const uint8* src_a, int stride_a,
src_b, stride_b, src_b, stride_b,
width, height); width, height);
return Sse2Psnr (samples, sse); return Sse2Psnr(samples, sse);
} }
double I420Psnr(const uint8* src_y_a, int stride_y_a, double I420Psnr(const uint8* src_y_a, int stride_y_a,
......
...@@ -22,9 +22,9 @@ ...@@ -22,9 +22,9 @@
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile ( asm volatile (
"mov %%ebx, %%edi\n" "mov %%ebx, %%edi \n"
"cpuid\n" "cpuid \n"
"xchg %%edi, %%ebx\n" "xchg %%edi, %%ebx \n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type) : "a"(info_type)
); );
...@@ -32,7 +32,7 @@ static inline void __cpuid(int cpu_info[4], int info_type) { ...@@ -32,7 +32,7 @@ static inline void __cpuid(int cpu_info[4], int info_type) {
#elif defined(__i386__) || defined(__x86_64__) #elif defined(__i386__) || defined(__x86_64__)
static inline void __cpuid(int cpu_info[4], int info_type) { static inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile ( asm volatile (
"cpuid\n" "cpuid \n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type) : "a"(info_type)
); );
......
...@@ -50,17 +50,17 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb, ...@@ -50,17 +50,17 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
#define HAS_ARGBTOBAYERROW_SSSE3 #define HAS_ARGBTOBAYERROW_SSSE3
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) { uint32 selector, int pix) {
asm volatile( asm volatile (
"movd %3,%%xmm5\n" "movd %3,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5\n" "pshufd $0x0,%%xmm5,%%xmm5 \n"
"1:\n" "1: \n"
"movdqa (%0),%%xmm0\n" "movdqa (%0),%%xmm0 \n"
"lea 0x10(%0),%0\n" "lea 0x10(%0),%0 \n"
"pshufb %%xmm5,%%xmm0\n" "pshufb %%xmm5,%%xmm0 \n"
"movd %%xmm0,(%1)\n" "movd %%xmm0,(%1) \n"
"lea 0x4(%1),%1\n" "lea 0x4(%1),%1 \n"
"sub $0x4,%2\n" "sub $0x4,%2 \n"
"ja 1b\n" "ja 1b \n"
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(dst_bayer), // %1 "+r"(dst_bayer), // %1
"+r"(pix) // %2 "+r"(pix) // %2
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -92,7 +92,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { ...@@ -92,7 +92,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24 pslld xmm5, 24
wloop: convertloop:
movq xmm0, qword ptr [eax] movq xmm0, qword ptr [eax]
lea eax, [eax + 8] lea eax, [eax + 8]
punpcklbw xmm0, xmm0 punpcklbw xmm0, xmm0
...@@ -105,7 +105,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { ...@@ -105,7 +105,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
movdqa [edx + 16], xmm1 movdqa [edx + 16], xmm1
lea edx, [edx + 32] lea edx, [edx + 32]
sub ecx, 8 sub ecx, 8
ja wloop ja convertloop
ret ret
} }
} }
...@@ -753,18 +753,18 @@ SIMD_ALIGNED(const int16 kUVBiasR[8]) = { ...@@ -753,18 +753,18 @@ SIMD_ALIGNED(const int16 kUVBiasR[8]) = {
__asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
__asm movdqa xmm1, xmm0 \ __asm movdqa xmm1, xmm0 \
__asm movdqa xmm2, xmm0 \ __asm movdqa xmm2, xmm0 \
__asm pmaddubsw xmm0, kUVToB /* scale B UV */ \ __asm pmaddubsw xmm0, kUVToB /* scale B UV */ \
__asm pmaddubsw xmm1, kUVToG /* scale G UV */ \ __asm pmaddubsw xmm1, kUVToG /* scale G UV */ \
__asm pmaddubsw xmm2, kUVToR /* scale R UV */ \ __asm pmaddubsw xmm2, kUVToR /* scale R UV */ \
__asm psubw xmm0, kUVBiasB /* unbias back to signed */ \ __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \
__asm psubw xmm1, kUVBiasG \ __asm psubw xmm1, kUVBiasG \
__asm psubw xmm2, kUVBiasR \ __asm psubw xmm2, kUVBiasR \
/* Step 2: Find Y contribution to 8 R,G,B values */ \ /* Step 2: Find Y contribution to 8 R,G,B values */ \
__asm movq xmm3, qword ptr [eax] \ __asm movq xmm3, qword ptr [eax] \
__asm lea eax, [eax + 8] \ __asm lea eax, [eax + 8] \
__asm punpcklbw xmm3, xmm4 \ __asm punpcklbw xmm3, xmm4 \
__asm psubsw xmm3, kYSub16 \ __asm psubsw xmm3, kYSub16 \
__asm pmullw xmm3, kYToRgb \ __asm pmullw xmm3, kYToRgb \
__asm paddw xmm0, xmm3 /* B += Y */ \ __asm paddw xmm0, xmm3 /* B += Y */ \
__asm paddw xmm1, xmm3 /* G += Y */ \ __asm paddw xmm1, xmm3 /* G += Y */ \
__asm paddw xmm2, xmm3 /* R += Y */ \ __asm paddw xmm2, xmm3 /* R += Y */ \
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment