Commit 8676ad70 authored by Frank Barchard's avatar Frank Barchard

scale float samples and return max value

BUG=libyuv:717
TEST=ScaleSum unittest to compare C vs Arm implementation
TBR=kjellander@chromium.org

Change-Id: Iaa7af5547d979aad4722f868d31b405340115748
Reviewed-on: https://chromium-review.googlesource.com/600534Reviewed-by: 's avatarCheng Wang <wangcheng@google.com>
parent 27036e33
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1663 Version: 1664
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -359,6 +359,11 @@ extern "C" { ...@@ -359,6 +359,11 @@ extern "C" {
#define HAS_SOBELYROW_NEON #define HAS_SOBELYROW_NEON
#endif #endif
// The following are available on AArch64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_SCALESUMSAMPLES_NEON
#endif
// The following are available on Mips platforms: // The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \ #if !defined(LIBYUV_DISABLE_DSPR2) && defined(__mips__) && \
(_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6) (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
...@@ -3152,6 +3157,14 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, ...@@ -3152,6 +3157,14 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb,
const uint8* luma, const uint8* luma,
uint32 lumacoeff); uint32 lumacoeff);
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width);
float ScaleSumSamples_NEON(const float* src,
float* dst,
float scale,
int width);
void ScaleSamples_C(const float* src, float* dst, float scale, int width);
void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1663 #define LIBYUV_VERSION 1664
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -26,7 +26,7 @@ extern "C" { ...@@ -26,7 +26,7 @@ extern "C" {
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
uint32 diff; uint32 diff;
asm volatile ( asm volatile(
"vmov.u16 q4, #0 \n" // accumulator "vmov.u16 q4, #0 \n" // accumulator
"1: \n" "1: \n"
...@@ -46,10 +46,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { ...@@ -46,10 +46,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vpadd.u32 d0, d0, d0 \n" "vpadd.u32 d0, d0, d0 \n"
"vmov.32 %3, d0[0] \n" "vmov.32 %3, d0[0] \n"
: "+r"(src_a), : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
"+r"(src_b),
"+r"(count),
"=r"(diff)
: :
: "cc", "q0", "q1", "q2", "q3", "q4"); : "cc", "q0", "q1", "q2", "q3", "q4");
return diff; return diff;
...@@ -57,7 +54,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { ...@@ -57,7 +54,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse; uint32 sse;
asm volatile ( asm volatile(
"vmov.u8 q8, #0 \n" "vmov.u8 q8, #0 \n"
"vmov.u8 q10, #0 \n" "vmov.u8 q10, #0 \n"
"vmov.u8 q9, #0 \n" "vmov.u8 q9, #0 \n"
...@@ -81,10 +78,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { ...@@ -81,10 +78,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"vpaddl.u32 q1, q11 \n" "vpaddl.u32 q1, q11 \n"
"vadd.u64 d0, d2, d3 \n" "vadd.u64 d0, d2, d3 \n"
"vmov.32 %3, d0[0] \n" "vmov.32 %3, d0[0] \n"
: "+r"(src_a), : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
"+r"(src_b),
"+r"(count),
"=r"(sse)
: :
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
return sse; return sse;
......
...@@ -24,7 +24,7 @@ extern "C" { ...@@ -24,7 +24,7 @@ extern "C" {
// uses short accumulator which restricts count to 131 KB // uses short accumulator which restricts count to 131 KB
uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
uint32 diff; uint32 diff;
asm volatile ( asm volatile(
"movi v4.8h, #0 \n" "movi v4.8h, #0 \n"
"1: \n" "1: \n"
...@@ -41,10 +41,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { ...@@ -41,10 +41,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
"uaddlv s4, v4.8h \n" "uaddlv s4, v4.8h \n"
"fmov %w3, s4 \n" "fmov %w3, s4 \n"
: "+r"(src_a), : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
"+r"(src_b),
"+r"(count),
"=r"(diff)
: :
: "cc", "v0", "v1", "v2", "v3", "v4"); : "cc", "v0", "v1", "v2", "v3", "v4");
return diff; return diff;
...@@ -52,7 +49,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) { ...@@ -52,7 +49,7 @@ uint32 HammingDistance_NEON(const uint8* src_a, const uint8* src_b, int count) {
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse; uint32 sse;
asm volatile ( asm volatile(
"eor v16.16b, v16.16b, v16.16b \n" "eor v16.16b, v16.16b, v16.16b \n"
"eor v18.16b, v18.16b, v18.16b \n" "eor v18.16b, v18.16b, v18.16b \n"
"eor v17.16b, v17.16b, v17.16b \n" "eor v17.16b, v17.16b, v17.16b \n"
...@@ -75,10 +72,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { ...@@ -75,10 +72,7 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
"add v19.4s, v16.4s, v18.4s \n" "add v19.4s, v16.4s, v18.4s \n"
"addv s0, v19.4s \n" "addv s0, v19.4s \n"
"fmov %w3, s0 \n" "fmov %w3, s0 \n"
: "+r"(src_a), : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
"+r"(src_b),
"+r"(count),
"=r"(sse)
: :
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
return sse; return sse;
......
...@@ -30,7 +30,7 @@ void TransposeWx8_NEON(const uint8* src, ...@@ -30,7 +30,7 @@ void TransposeWx8_NEON(const uint8* src,
int dst_stride, int dst_stride,
int width) { int width) {
const uint8* src_temp; const uint8* src_temp;
asm volatile ( asm volatile(
// loops are on blocks of 8. loop will stop when // loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter // counter gets to or below 0. starting the counter
// at w-8 allow for this // at w-8 allow for this
...@@ -193,8 +193,7 @@ void TransposeWx8_NEON(const uint8* src, ...@@ -193,8 +193,7 @@ void TransposeWx8_NEON(const uint8* src,
"r"(static_cast<ptrdiff_t>(src_stride)), // %5 "r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6 "r"(static_cast<ptrdiff_t>(dst_stride)) // %6
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23" "v17", "v18", "v19", "v20", "v21", "v22", "v23");
);
} }
static uint8 kVTbl4x4TransposeDi[32] = { static uint8 kVTbl4x4TransposeDi[32] = {
...@@ -209,7 +208,7 @@ void TransposeUVWx8_NEON(const uint8* src, ...@@ -209,7 +208,7 @@ void TransposeUVWx8_NEON(const uint8* src,
int dst_stride_b, int dst_stride_b,
int width) { int width) {
const uint8* src_temp; const uint8* src_temp;
asm volatile ( asm volatile(
// loops are on blocks of 8. loop will stop when // loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter // counter gets to or below 0. starting the counter
// at w-8 allow for this // at w-8 allow for this
...@@ -278,8 +277,10 @@ void TransposeUVWx8_NEON(const uint8* src, ...@@ -278,8 +277,10 @@ void TransposeUVWx8_NEON(const uint8* src,
"st1 {v23.d}[1], [%0] \n" "st1 {v23.d}[1], [%0] \n"
"add %1, %1, #16 \n" // src += 8*2 "add %1, %1, #16 \n" // src += 8*2
"add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a "add %2, %2, %6, lsl #3 \n" // dst_a += 8 *
"add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b // dst_stride_a
"add %3, %3, %7, lsl #3 \n" // dst_b += 8 *
// dst_stride_b
"subs %w4, %w4, #8 \n" // w -= 8 "subs %w4, %w4, #8 \n" // w -= 8
"b.ge 1b \n" "b.ge 1b \n"
...@@ -342,8 +343,10 @@ void TransposeUVWx8_NEON(const uint8* src, ...@@ -342,8 +343,10 @@ void TransposeUVWx8_NEON(const uint8* src,
"st1 {v19.s}[3], [%0] \n" "st1 {v19.s}[3], [%0] \n"
"add %1, %1, #8 \n" // src += 4 * 2 "add %1, %1, #8 \n" // src += 4 * 2
"add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a "add %2, %2, %6, lsl #2 \n" // dst_a += 4 *
"add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b // dst_stride_a
"add %3, %3, %7, lsl #2 \n" // dst_b += 4 *
// dst_stride_b
"subs %w4, %w4, #4 \n" // w -= 4 "subs %w4, %w4, #4 \n" // w -= 4
"b.eq 4f \n" "b.eq 4f \n"
...@@ -380,8 +383,10 @@ void TransposeUVWx8_NEON(const uint8* src, ...@@ -380,8 +383,10 @@ void TransposeUVWx8_NEON(const uint8* src,
"st1 {v7.d}[0], [%0] \n" "st1 {v7.d}[0], [%0] \n"
"add %1, %1, #4 \n" // src += 2 * 2 "add %1, %1, #4 \n" // src += 2 * 2
"add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a "add %2, %2, %6, lsl #1 \n" // dst_a += 2 *
"add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b // dst_stride_a
"add %3, %3, %7, lsl #1 \n" // dst_b += 2 *
// dst_stride_b
"subs %w4, %w4, #2 \n" // w -= 2 "subs %w4, %w4, #2 \n" // w -= 2
"b.eq 4f \n" "b.eq 4f \n"
...@@ -410,11 +415,8 @@ void TransposeUVWx8_NEON(const uint8* src, ...@@ -410,11 +415,8 @@ void TransposeUVWx8_NEON(const uint8* src,
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6 "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7 "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
"r"(&kVTbl4x4TransposeDi) // %8 "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v30", "v31"
);
} }
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
......
...@@ -2639,6 +2639,25 @@ void NV12ToRGB565Row_AVX2(const uint8* src_y, ...@@ -2639,6 +2639,25 @@ void NV12ToRGB565Row_AVX2(const uint8* src_y,
} }
#endif #endif
float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
float fmax = 0.f;
int i;
for (i = 0; i < width; ++i) {
float v = *src++ * scale;
*dst++ = v;
fmax = (v > fmax) ? v : fmax;
}
return fmax;
}
void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
int i;
for (i = 0; i < width; ++i) {
float v = *src++ * scale;
*dst++ = v;
}
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -2612,6 +2612,53 @@ void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { ...@@ -2612,6 +2612,53 @@ void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
: "cc", "memory", "v1", "v2", "v3"); : "cc", "memory", "v1", "v2", "v3");
} }
float ScaleSumSamples_NEON(const float* src,
float* dst,
float scale,
int width) {
float fmax;
asm volatile(
"movi v3.4s, #0 \n" // max
"movi v4.4s, #0 \n" // max
"1: \n"
"ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples
"subs %w2, %w2, #8 \n" // 8 processed per loop
"fmul v1.4s, v1.4s, %4.s[0] \n" // scale
"fmul v2.4s, v2.4s, %4.s[0] \n" // scale
"st1 {v1.4s, v2.4s}, [%1], #32 \n" // store 8 samples
"fmax v3.4s, v3.4s, v1.4s \n" // max
"fmax v4.4s, v4.4s, v2.4s \n"
"b.gt 1b \n"
"fmax v3.4s, v3.4s, v4.4s \n" // max
"fmaxv %s3, v3.4s \n" // signed max acculator
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width), // %2
"=w"(fmax) // %3
: "w"(scale) // %4
: "cc", "memory", "v1", "v2", "v3", "v4");
return fmax;
}
void ScaleSamples_NEON(const float* src, float* dst, float scale, int width) {
asm volatile(
"1: \n"
"ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples
"subs %w2, %w2, #8 \n" // 8 processed per loop
"fmul v1.4s, v1.4s, %3.s[0] \n" // scale
"fmul v2.4s, v2.4s, %3.s[0] \n" // scale
"st1 {v1.4s, v2.4s}, [%1], #32 \n" // store 8 samples
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(dst), // %1
"+r"(width) // %2
: "w"(scale) // %3
: "cc", "memory", "v1", "v2");
}
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#ifdef __cplusplus #ifdef __cplusplus
......
This diff is collapsed.
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
// row.h defines SIMD_ALIGNED, overriding unit_test.h
#include "libyuv/row.h" /* For ScaleSumSamples_Neon */
#include "../unit_test/unit_test.h" #include "../unit_test/unit_test.h"
#include "libyuv/compare.h" #include "libyuv/compare.h"
#include "libyuv/convert.h" #include "libyuv/convert.h"
...@@ -2518,4 +2521,146 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { ...@@ -2518,4 +2521,146 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) {
free_aligned_buffer_page_end(dst_pixels_c); free_aligned_buffer_page_end(dst_pixels_c);
} }
float TestScaleSumSamples(int benchmark_width,
int benchmark_height,
int benchmark_iterations,
float scale,
bool opt) {
int i, j;
float max_c, max_opt;
const int y_plane_size = benchmark_width * benchmark_height * 4;
align_buffer_page_end(orig_y, y_plane_size * 3);
uint8* dst_opt = orig_y + y_plane_size;
uint8* dst_c = orig_y + y_plane_size * 2;
// Randomize works but may contain some denormals affecting performance.
// MemRandomize(orig_y, y_plane_size);
for (i = 0; i < y_plane_size / 4; ++i) {
(reinterpret_cast<float*>(orig_y))[i] = (i - y_plane_size / 8) * 3.1415f;
}
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 1, y_plane_size);
// Disable all optimizations.
max_c = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_c), scale,
benchmark_width * benchmark_height);
// Enable optimizations.
for (j = 0; j < benchmark_iterations; j++) {
#ifdef HAS_SCALESUMSAMPLES_NEON
if (opt) {
max_opt = ScaleSumSamples_NEON(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
benchmark_width * benchmark_height);
} else {
max_opt = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
benchmark_width * benchmark_height);
}
#else
max_opt = ScaleSumSamples_C(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
benchmark_width * benchmark_height);
#endif
}
float max_diff = 0;
for (i = 0; i < y_plane_size / 4; ++i) {
float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
(reinterpret_cast<float*>(dst_opt)[i]));
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_page_end(orig_y);
return max_diff;
}
TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) {
float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
benchmark_iterations_, 1.2f, false);
EXPECT_EQ(0, diff);
}
TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) {
float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_,
benchmark_iterations_, 1.2f, true);
EXPECT_EQ(0, diff);
}
float TestScaleSamples(int benchmark_width,
int benchmark_height,
int benchmark_iterations,
float scale,
bool opt) {
int i, j;
const int y_plane_size = benchmark_width * benchmark_height * 4;
align_buffer_page_end(orig_y, y_plane_size * 3);
uint8* dst_opt = orig_y + y_plane_size;
uint8* dst_c = orig_y + y_plane_size * 2;
// Randomize works but may contain some denormals affecting performance.
// MemRandomize(orig_y, y_plane_size);
for (i = 0; i < y_plane_size / 4; ++i) {
(reinterpret_cast<float*>(orig_y))[i] = (i - y_plane_size / 8) * 3.1415f;
}
memset(dst_c, 0, y_plane_size);
memset(dst_opt, 1, y_plane_size);
// Disable all optimizations.
ScaleSamples_C(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_c), scale,
benchmark_width * benchmark_height);
// Enable optimizations.
for (j = 0; j < benchmark_iterations; j++) {
#ifdef HAS_SCALESAMPLES_NEON
if (opt) {
max_opt = ScaleSamples_NEON(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
benchmark_width * benchmark_height);
} else {
ScaleSamples_C(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
benchmark_width * benchmark_height);
}
#else
ScaleSamples_C(reinterpret_cast<float*>(orig_y),
reinterpret_cast<float*>(dst_opt), scale,
benchmark_width * benchmark_height);
#endif
}
float max_diff = 0;
for (i = 0; i < y_plane_size / 4; ++i) {
float abs_diff = FAbs((reinterpret_cast<float*>(dst_c)[i]) -
(reinterpret_cast<float*>(dst_opt)[i]));
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
}
free_aligned_buffer_page_end(orig_y);
return max_diff;
}
TEST_F(LibYUVPlanarTest, TestScaleSamples_C) {
float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
benchmark_iterations_, 1.2f, false);
EXPECT_EQ(0, diff);
}
TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) {
float diff = TestScaleSamples(benchmark_width_, benchmark_height_,
benchmark_iterations_, 1.2f, true);
EXPECT_EQ(0, diff);
}
} // namespace libyuv } // namespace libyuv
...@@ -36,6 +36,9 @@ static __inline int Abs(int v) { ...@@ -36,6 +36,9 @@ static __inline int Abs(int v) {
return v >= 0 ? v : -v; return v >= 0 ? v : -v;
} }
static __inline float FAbs(float v) {
return v >= 0 ? v : -v;
}
#define OFFBY 0 #define OFFBY 0
// Scaling uses 16.16 fixed point to step thru the source image, so a // Scaling uses 16.16 fixed point to step thru the source image, so a
...@@ -70,8 +73,11 @@ static inline bool SizeValid(int src_width, ...@@ -70,8 +73,11 @@ static inline bool SizeValid(int src_width,
uint8* var; \ uint8* var; \
uint8* var##_mem; \ uint8* var##_mem; \
var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \ var##_mem = reinterpret_cast<uint8*>(malloc(((size) + 4095 + 63) & ~4095)); \
var = (uint8*)((intptr_t)(var##_mem + (((size) + 4095 + 63) & /* NOLINT */ \ var = (uint8*)((intptr_t)(var##_mem + \
~4095) - (size)) & ~63); (((size) + 4095 + 63) & /* NOLINT */ \
~4095) - \
(size)) & \
~63);
#define free_aligned_buffer_page_end(var) \ #define free_aligned_buffer_page_end(var) \
free(var##_mem); \ free(var##_mem); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment