Commit f7a5048f authored by fbarchard@google.com's avatar fbarchard@google.com

align asm new line to column 48

BUG=none
TEST=builds
Review URL: http://webrtc-codereview.appspot.com/268008

git-svn-id: http://libyuv.googlecode.com/svn/trunk@73 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 2cb934c6
......@@ -23,35 +23,30 @@ namespace libyuv {
static uint32 SumSquareError_NEON(const uint8* src_a,
const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile
(
"vmov.u8 q7, #0\n"
"vmov.u8 q9, #0\n"
"vmov.u8 q8, #0\n"
"vmov.u8 q10, #0\n"
"1:\n"
"vld1.u8 {q0}, [%0]!\n"
"vld1.u8 {q1}, [%1]!\n"
"vsubl.u8 q2, d0, d2\n"
"vsubl.u8 q3, d1, d3\n"
"vmlal.s16 q7, d4, d4\n"
"vmlal.s16 q8, d6, d6\n"
"vmlal.s16 q8, d5, d5\n"
"vmlal.s16 q10, d7, d7\n"
"subs %2, %2, #16\n"
"bhi 1b\n"
"vadd.u32 q7, q7, q8\n"
"vadd.u32 q9, q9, q10\n"
"vadd.u32 q10, q7, q9\n"
"vpaddl.u32 q1, q10\n"
"vadd.u64 d0, d2, d3\n"
"vmov.32 %3, d0[0]\n"
asm volatile (
"vmov.u8 q7, #0 \n"
"vmov.u8 q9, #0 \n"
"vmov.u8 q8, #0 \n"
"vmov.u8 q10, #0 \n"
"1: \n"
"vld1.u8 {q0}, [%0]! \n"
"vld1.u8 {q1}, [%1]! \n"
"vsubl.u8 q2, d0, d2 \n"
"vsubl.u8 q3, d1, d3 \n"
"vmlal.s16 q7, d4, d4 \n"
"vmlal.s16 q8, d6, d6 \n"
"vmlal.s16 q8, d5, d5 \n"
"vmlal.s16 q10, d7, d7 \n"
"subs %2, %2, #16 \n"
"bhi 1b \n"
"vadd.u32 q7, q7, q8 \n"
"vadd.u32 q9, q9, q10 \n"
"vadd.u32 q10, q7, q9 \n"
"vpaddl.u32 q1, q10 \n"
"vadd.u64 d0, d2, d3 \n"
"vmov.32 %3, d0[0] \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
......@@ -59,7 +54,6 @@ static uint32 SumSquareError_NEON(const uint8* src_a,
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"
);
return sse;
}
......@@ -102,7 +96,6 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
pshufd xmm1, xmm0, 01h
paddd xmm0, xmm1
movd eax, xmm0
ret
}
}
......@@ -112,11 +105,12 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
// DISABLE
//#define HAS_SUMSQUAREERROR_SSE2
// DISABLE
#if HAS_SUMSQUAREERROR_SSE2
static uint32 SumSquareError_SSE2(const uint8* src_a,
const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile(
"\n"
asm volatile (
" \n"
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
......@@ -131,6 +125,7 @@ static uint32 SumSquareError_SSE2(const uint8* src_a,
}
#endif
#endif
#endif
static uint32 SumSquareError_C(const uint8* src_a,
const uint8* src_b, int count) {
......@@ -148,7 +143,6 @@ uint64 ComputeSumSquareError(const uint8* src_a,
const uint8* src_b, int count) {
uint32 (*SumSquareError)(const uint8* src_a,
const uint8* src_b, int count);
#if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON;
......@@ -162,10 +156,8 @@ uint64 ComputeSumSquareError(const uint8* src_a,
{
SumSquareError = SumSquareError_C;
}
const int kBlockSize = 4096;
uint64 diff = 0;
while (count >= kBlockSize) {
diff += SumSquareError(src_a, src_b, kBlockSize);
src_a += kBlockSize;
......@@ -179,7 +171,6 @@ uint64 ComputeSumSquareError(const uint8* src_a,
diff += static_cast<uint64>(SumSquareError_C(src_a, src_b, count));
}
}
return diff;
}
......@@ -188,7 +179,6 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
int width, int height) {
uint32 (*SumSquareError)(const uint8* src_a,
const uint8* src_b, int count);
#if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
(width % 16 == 0)) {
......@@ -200,7 +190,6 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
}
uint64 sse = 0;
for (int h = 0; h < height; ++h) {
sse += static_cast<uint64>(SumSquareError(src_a, src_b, width));
src_a += stride_a;
......@@ -210,11 +199,10 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
return sse;
}
double Sse2Psnr(double Samples, double Sse) {
double Sse2Psnr(double samples, double sse) {
double psnr;
if (Sse > 0.0)
psnr = 10.0 * log10(255.0 * 255.0 * Samples / Sse);
if (sse > 0.0)
psnr = 10.0 * log10(255.0 * 255.0 * samples / sse);
else
psnr = kMaxPsnr; // Limit to prevent divide by 0
......@@ -224,6 +212,21 @@ double Sse2Psnr(double Samples, double Sse) {
return psnr;
}
double Sse2Psnr(uint64 samples, uint64 sse) {
double psnr;
if (sse > 0) {
double mse = static_cast<double>(samples) / static_cast<double>(sse);
psnr = 10.0 * log10(255.0 * 255.0 * mse);
} else {
psnr = kMaxPsnr; // Limit to prevent divide by 0
}
if (psnr > kMaxPsnr)
psnr = kMaxPsnr;
return psnr;
}
double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
......@@ -233,7 +236,7 @@ double CalcFramePsnr(const uint8* src_a, int stride_a,
src_b, stride_b,
width, height);
return Sse2Psnr (samples, sse);
return Sse2Psnr(samples, sse);
}
double I420Psnr(const uint8* src_y_a, int stride_y_a,
......
......@@ -22,9 +22,9 @@
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile (
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
"mov %%ebx, %%edi \n"
"cpuid \n"
"xchg %%edi, %%ebx \n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
......@@ -32,7 +32,7 @@ static inline void __cpuid(int cpu_info[4], int info_type) {
#elif defined(__i386__) || defined(__x86_64__)
static inline void __cpuid(int cpu_info[4], int info_type) {
asm volatile (
"cpuid\n"
"cpuid \n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
......
......@@ -50,17 +50,17 @@ static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
#define HAS_ARGBTOBAYERROW_SSSE3
static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) {
asm volatile(
"movd %3,%%xmm5\n"
"pshufd $0x0,%%xmm5,%%xmm5\n"
"1:\n"
"movdqa (%0),%%xmm0\n"
"lea 0x10(%0),%0\n"
"pshufb %%xmm5,%%xmm0\n"
"movd %%xmm0,(%1)\n"
"lea 0x4(%1),%1\n"
"sub $0x4,%2\n"
"ja 1b\n"
asm volatile (
"movd %3,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"pshufb %%xmm5,%%xmm0 \n"
"movd %%xmm0,(%1) \n"
"lea 0x4(%1),%1 \n"
"sub $0x4,%2 \n"
"ja 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_bayer), // %1
"+r"(pix) // %2
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -92,7 +92,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
pcmpeqb xmm5, xmm5 // generate mask 0xff000000
pslld xmm5, 24
wloop:
convertloop:
movq xmm0, qword ptr [eax]
lea eax, [eax + 8]
punpcklbw xmm0, xmm0
......@@ -105,7 +105,7 @@ void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
movdqa [edx + 16], xmm1
lea edx, [edx + 32]
sub ecx, 8
ja wloop
ja convertloop
ret
}
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment