Commit 747ceb9f authored by fbarchard@google.com's avatar fbarchard@google.com

FixedDiv using integers

BUG=250
TEST=fixed div unittest
R=dingkai@google.com, ryanpetrie@google.com

Review URL: https://webrtc-codereview.appspot.com/1681004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@732 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 11404545
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 731 Version: 732
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -130,6 +130,7 @@ extern "C" { ...@@ -130,6 +130,7 @@ extern "C" {
// TODO(fbarchard): Port to gcc. // TODO(fbarchard): Port to gcc.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#define HAS_ARGBCOLORTABLEROW_X86 #define HAS_ARGBCOLORTABLEROW_X86
#define HAS_FIXEDDIV
// Visual C 2012 required for AVX2. // Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700 #if _MSC_VER >= 1700
#define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2
...@@ -1516,16 +1517,13 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, ...@@ -1516,16 +1517,13 @@ void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width); uint8* dst_argb, int width);
void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
uint8* dst_argb, int width); uint8* dst_argb, int width);
extern const float kRecipTable[4097];
// Divide num by div and return value as 16.16 fixed point. // Divide num by div and return as 16.16 fixed point result.
#ifdef __cplusplus int FixedDiv_C(int num, int div);
static __inline int FixedDiv(int num, int div) { #ifdef HAS_FIXEDDIV
if (static_cast<unsigned int>(div) <= 4096u) { int FixedDiv(int num, int div);
return static_cast<int>(num * kRecipTable[div]); #else
} #define FixedDiv FixedDiv_C
return static_cast<int>((static_cast<int64>(num) << 16) / div);
}
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 731 #define LIBYUV_VERSION 732
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
This diff is collapsed.
...@@ -6602,6 +6602,30 @@ void I422ToUYVYRow_SSE2(const uint8* src_y, ...@@ -6602,6 +6602,30 @@ void I422ToUYVYRow_SSE2(const uint8* src_y,
ret ret
} }
} }
// Fixed point 0.32 reciprocal table.
extern const uint32 kRecipTable[4097];
// Divide num by div and return as 16.16 fixed point result.
__declspec(naked) __declspec(align(16))
int FixedDiv(int num, int div) {
__asm {
mov eax, [esp + 4] // num
mov ecx, [esp + 8] // div
cmp ecx, 4096
ja largediv
mul dword ptr kRecipTable[ecx * 4]
shrd eax, edx, 16
ret
largediv:
cwd // extend num to 64 bits
shld edx, eax, 16
shl eax, 16
idiv ecx
ret
}
}
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -12,16 +12,12 @@ ...@@ -12,16 +12,12 @@
#include <string.h> #include <string.h>
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h" #include "libyuv/row.h"
#include "../unit_test/unit_test.h" #include "../unit_test/unit_test.h"
namespace libyuv { namespace libyuv {
// Divide num by div and return value as 16.16 fixed point.
static int FixedDiv_C(int num, int div) {
return static_cast<int>((static_cast<int64>(num) << 16) / div);
}
TEST_F(libyuvTest, TestFixedDiv) { TEST_F(libyuvTest, TestFixedDiv) {
int num[256]; int num[256];
int div[256]; int div[256];
...@@ -43,7 +39,7 @@ TEST_F(libyuvTest, TestFixedDiv) { ...@@ -43,7 +39,7 @@ TEST_F(libyuvTest, TestFixedDiv) {
} }
for (int j = 0; j < 256; ++j) { for (int j = 0; j < 256; ++j) {
result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); result_c[j] = libyuv::FixedDiv_C(num[j], div[j]);
EXPECT_NEAR(result_c[j], result_opt[j], 3); EXPECT_NEAR(result_c[j], result_opt[j], 1);
} }
} }
...@@ -64,8 +60,7 @@ TEST_F(libyuvTest, TestFixedDiv_Opt) { ...@@ -64,8 +60,7 @@ TEST_F(libyuvTest, TestFixedDiv_Opt) {
EXPECT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960)); EXPECT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960));
EXPECT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640)); EXPECT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640));
EXPECT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640)); EXPECT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640));
// TODO(fbarchard): Improve accuracy for divides should be exact. EXPECT_EQ(0x20000, libyuv::FixedDiv(1080 * 2, 1080));
EXPECT_NEAR(0x20000, libyuv::FixedDiv(1080 * 2, 1080), 1);
srandom(time(NULL)); srandom(time(NULL));
MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num)); MemRandomize(reinterpret_cast<uint8*>(&num[0]), sizeof(num));
...@@ -77,14 +72,22 @@ TEST_F(libyuvTest, TestFixedDiv_Opt) { ...@@ -77,14 +72,22 @@ TEST_F(libyuvTest, TestFixedDiv_Opt) {
div[j] = 1280; div[j] = 1280;
} }
} }
int has_x86 = TestCpuFlag(kCpuHasX86);
for (int i = 0; i < benchmark_pixels_div256_; ++i) { for (int i = 0; i < benchmark_pixels_div256_; ++i) {
for (int j = 0; j < 256; ++j) { if (has_x86) {
result_opt[j] = libyuv::FixedDiv(num[j], div[j]); for (int j = 0; j < 256; ++j) {
result_opt[j] = libyuv::FixedDiv(num[j], div[j]);
}
} else {
for (int j = 0; j < 256; ++j) {
result_opt[j] = libyuv::FixedDiv_C(num[j], div[j]);
}
} }
} }
for (int j = 0; j < 256; ++j) { for (int j = 0; j < 256; ++j) {
result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); result_c[j] = libyuv::FixedDiv_C(num[j], div[j]);
EXPECT_NEAR(result_c[j], result_opt[j], 3); EXPECT_NEAR(result_c[j], result_opt[j], 1);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment