Commit 5dba58cb authored by fbarchard@google.com's avatar fbarchard@google.com

FixedDiv1 using a single 64/32 divide. Removes size restriction from slope.

BUG=302
TESTED=libyuv scale tests
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/6489004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@940 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 27737872
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 939
Version: 941
License: BSD
License File: LICENSE
......
......@@ -26,6 +26,7 @@
#include "libyuv/row.h"
#include "libyuv/scale.h"
#include "libyuv/scale_argb.h"
#include "libyuv/scale_row.h"
#include "libyuv/version.h"
#include "libyuv/video_common.h"
......
......@@ -104,7 +104,6 @@ extern "C" {
#define HAS_COPYROW_ERMS
#define HAS_COPYROW_SSE2
#define HAS_COPYROW_X86
#define HAS_FIXEDDIV_X86
#define HAS_HALFROW_SSE2
#define HAS_I400TOARGBROW_SSE2
#define HAS_I411TOARGBROW_SSSE3
......@@ -1684,15 +1683,6 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
int width, const uint8* luma,
const uint32 lumacoeff);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div);
int FixedDiv_X86(int num, int div);
#ifdef HAS_FIXEDDIV_X86
#define FixedDiv FixedDiv_X86
#else
#define FixedDiv FixedDiv_C
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -33,6 +33,8 @@ extern "C" {
#define HAS_SCALEARGBCOLS_SSE2
#define HAS_SCALEARGBFILTERCOLS_SSSE3
#define HAS_SCALEARGBCOLSUP2_SSE2
#define HAS_FIXEDDIV_X86
#define HAS_FIXEDDIV1_X86
#endif
// The following are available on Neon platforms:
......@@ -61,17 +63,31 @@ void ScalePlaneVertical(int src_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int y, int dy,
int bpp, FilterMode filtering);
int bpp, enum FilterMode filtering);
// Simplify the filtering based on scale factors.
FilterMode ScaleFilterReduce(int src_width, int src_height,
int dst_width, int dst_height,
FilterMode filtering);
enum FilterMode ScaleFilterReduce(int src_width, int src_height,
int dst_width, int dst_height,
enum FilterMode filtering);
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div);
int FixedDiv_X86(int num, int div);
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div);
int FixedDiv1_X86(int num, int div);
#ifdef HAS_FIXEDDIV_X86
#define FixedDiv FixedDiv_X86
#define FixedDiv1 FixedDiv1_X86
#else
#define FixedDiv FixedDiv_C
#define FixedDiv1 FixedDiv1_C
#endif
// Compute slope values for stepping.
void ScaleSlope(int src_width, int src_height,
int dst_width, int dst_height,
FilterMode filtering,
enum FilterMode filtering,
int* x, int* y, int* dx, int* dy);
void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t /* src_stride */,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 939
#define LIBYUV_VERSION 941
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -59,11 +59,6 @@ static __inline uint32 Abs(int32 v) {
}
#endif // USE_BRANCHLESS
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div) {
return static_cast<int>((static_cast<int64>(num) << 16) / div);
}
#ifdef LIBYUV_LITTLE_ENDIAN
#define WRITEWORD(p, v) *reinterpret_cast<uint32*>(p) = v
#else
......
......@@ -6170,23 +6170,6 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
}
#endif // HAS_I422TOUYVYROW_SSE2
#ifdef HAS_FIXEDDIV_X86
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_X86(int num, int div) {
asm volatile (
"cdq \n"
"shld $0x10,%%eax,%%edx \n"
"shl $0x10,%%eax \n"
"idiv %1 \n"
"mov %0, %%eax \n"
: "+a"(num) // %0
: "c"(div) // %1
: "memory", "cc", "edx"
);
return num;
}
#endif // HAS_FIXEDDIV_X86
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
uint8* dst_argb, const float* poly,
......
......@@ -7009,21 +7009,6 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
}
}
#ifdef HAS_FIXEDDIV_X86
// Divide num by div and return as 16.16 fixed point result.
__declspec(naked) __declspec(align(16))
int FixedDiv_X86(int num, int div) {
__asm {
mov eax, [esp + 4] // num
cdq // extend num to 64 bits
shld edx, eax, 16 // 32.16
shl eax, 16
idiv dword ptr [esp + 8]
ret
}
}
#endif // HAS_FIXEDDIV_X86
#ifdef HAS_ARGBPOLYNOMIALROW_SSE2
__declspec(naked) __declspec(align(16))
void ARGBPolynomialRow_SSE2(const uint8* src_argb,
......
......@@ -584,9 +584,18 @@ FilterMode ScaleFilterReduce(int src_width, int src_height,
return filtering;
}
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_C(int num, int div) {
return static_cast<int>((static_cast<int64>(num) << 16) / div);
}
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv1_C(int num, int div) {
return static_cast<int>(((static_cast<int64>(num) << 16) - 0x00010001) /
(div - 1));
}
#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
#define FIXEDDIV1(src, dst) FixedDiv((src << 16) - 0x00010001, \
(dst << 16) - 0x00010000);
// Compute slope values for stepping.
void ScaleSlope(int src_width, int src_height,
......@@ -613,14 +622,14 @@ void ScaleSlope(int src_width, int src_height,
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_width > 1) {
*dx = FIXEDDIV1(Abs(src_width), dst_width);
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
if (dst_height <= src_height) {
*dy = FixedDiv(src_height, dst_height);
*y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_height > 1) {
*dy = FIXEDDIV1(src_height, dst_height);
*dy = FixedDiv1(src_height, dst_height);
*y = 0;
}
} else if (filtering == kFilterLinear) {
......@@ -629,7 +638,7 @@ void ScaleSlope(int src_width, int src_height,
*dx = FixedDiv(Abs(src_width), dst_width);
*x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
} else if (dst_width > 1) {
*dx = FIXEDDIV1(Abs(src_width), dst_width);
*dx = FixedDiv1(Abs(src_width), dst_width);
*x = 0;
}
*dy = FixedDiv(src_height, dst_height);
......@@ -649,7 +658,6 @@ void ScaleSlope(int src_width, int src_height,
}
}
#undef CENTERSTART
#undef FIXEDDIV1
#ifdef __cplusplus
} // extern "C"
......
......@@ -1274,6 +1274,39 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
);
}
// Divide num by div and return as 16.16 fixed point result.
int FixedDiv_X86(int num, int div) {
asm volatile (
"cdq \n"
"shld $0x10,%%eax,%%edx \n"
"shl $0x10,%%eax \n"
"idiv %1 \n"
"mov %0, %%eax \n"
: "+a"(num) // %0
: "c"(div) // %1
: "memory", "cc", "edx"
);
return num;
}
// Divide num - 1 by div - 1 and return as 16.16 fixed point result.
int FixedDiv1_X86(int num, int div) {
asm volatile (
"cdq \n"
"shld $0x10,%%eax,%%edx \n"
"shl $0x10,%%eax \n"
"sub $0x10001,%%eax \n"
"sbb $0x0,%%edx \n"
"sub $0x1,%1 \n"
"idiv %1 \n"
"mov %0, %%eax \n"
: "+a"(num) // %0
: "c"(div) // %1
: "memory", "cc", "edx"
);
return num;
}
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
......
......@@ -1281,6 +1281,36 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
}
}
// Divide num by div and return as 16.16 fixed point result.
__declspec(naked) __declspec(align(16))
int FixedDiv_X86(int num, int div) {
__asm {
mov eax, [esp + 4] // num
cdq // extend num to 64 bits
shld edx, eax, 16 // 32.16
shl eax, 16
idiv dword ptr [esp + 8]
ret
}
}
// Divide num by div and return as 16.16 fixed point result.
__declspec(naked) __declspec(align(16))
int FixedDiv1_X86(int num, int div) {
__asm {
mov eax, [esp + 4] // num
mov ecx, [esp + 8] // denom
cdq // extend num to 64 bits
shld edx, eax, 16 // 32.16
shl eax, 16
sub eax, 0x00010001
sbb edx, 0
sub ecx, 1
idiv ecx
ret
}
}
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#ifdef __cplusplus
......
......@@ -14,6 +14,8 @@
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
#include "libyuv/scale.h"
#include "libyuv/scale_row.h"
#include "../unit_test/unit_test.h"
namespace libyuv {
......@@ -27,7 +29,7 @@ TEST_F(libyuvTest, TestFixedDiv) {
EXPECT_EQ(0x10000, libyuv::FixedDiv(1, 1));
EXPECT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1));
// TODO(fbarchard): Avoid the following that throw exceptions.
// EXPECT_EQ(0x10000, libyuv::FixedDiv(0x10000, 1));
// EXPECT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1));
// EXPECT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1));
EXPECT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640));
......@@ -118,4 +120,39 @@ TEST_F(libyuvTest, TestFixedDiv_Opt) {
}
}
TEST_F(libyuvTest, TestFixedDiv1_Opt) {
int num[1280];
int div[1280];
int result_opt[1280];
int result_c[1280];
srandom(time(NULL));
MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num));
MemRandomize(reinterpret_cast<uint8*>(&div[0]), sizeof(div));
for (int j = 0; j < 1280; ++j) {
num[j] &= 4095; // Make numerator smaller.
div[j] &= 4095; // Make divisor smaller.
if (div[j] <= 1) {
div[j] = 1280;
}
}
int has_x86 = TestCpuFlag(kCpuHasX86);
for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
if (has_x86) {
for (int j = 0; j < 1280; ++j) {
result_opt[j] = libyuv::FixedDiv1(num[j], div[j]);
}
} else {
for (int j = 0; j < 1280; ++j) {
result_opt[j] = libyuv::FixedDiv1_C(num[j], div[j]);
}
}
}
for (int j = 0; j < 1280; ++j) {
result_c[j] = libyuv::FixedDiv1_C(num[j], div[j]);
EXPECT_NEAR(result_c[j], result_opt[j], 1);
}
}
} // namespace libyuv
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment