Commit f08ac6bb authored by fbarchard@google.com's avatar fbarchard@google.com

Rename row functions so they are all SomethingRow_CPU

BUG=133
TEST=still builds
Review URL: https://webrtc-codereview.appspot.com/939020

git-svn-id: http://libyuv.googlecode.com/svn/trunk@491 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent d985cf1f
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 490
Version: 491
License: BSD
License File: LICENSE
......
This diff is collapsed.
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 490
#define LIBYUV_VERSION 491
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
......@@ -365,56 +365,56 @@ static int X420ToI420(const uint8* src_y,
}
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
SplitUV_C;
#if defined(HAS_SPLITUV_SSE2)
void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
SplitUVRow_C;
#if defined(HAS_SPLITUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
SplitUV = SplitUV_Any_SSE2;
SplitUVRow = SplitUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_Unaligned_SSE2;
SplitUVRow = SplitUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_SSE2;
SplitUVRow = SplitUVRow_SSE2;
}
}
}
#endif
#if defined(HAS_SPLITUV_AVX2)
#if defined(HAS_SPLITUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
SplitUV = SplitUV_Any_AVX2;
SplitUVRow = SplitUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
SplitUV = SplitUV_Unaligned_AVX2;
SplitUVRow = SplitUVRow_Unaligned_AVX2;
if (IS_ALIGNED(src_uv, 32) && IS_ALIGNED(src_stride_uv, 32) &&
IS_ALIGNED(dst_u, 32) && IS_ALIGNED(dst_stride_u, 32) &&
IS_ALIGNED(dst_v, 32) && IS_ALIGNED(dst_stride_v, 32)) {
SplitUV = SplitUV_AVX2;
SplitUVRow = SplitUVRow_AVX2;
}
}
}
#endif
#if defined(HAS_SPLITUV_NEON)
#if defined(HAS_SPLITUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
SplitUV = SplitUV_Any_NEON;
SplitUVRow = SplitUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_Unaligned_NEON;
SplitUVRow = SplitUVRow_Unaligned_NEON;
if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_NEON;
SplitUVRow = SplitUVRow_NEON;
}
}
}
#endif
#if defined(HAS_SPLITUV_MIPS_DSPR2)
#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
SplitUV = SplitUV_Any_MIPS_DSPR2;
SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_Unaligned_MIPS_DSPR2;
SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
SplitUV = SplitUV_MIPS_DSPR2;
SplitUVRow = SplitUVRow_MIPS_DSPR2;
}
}
}
......@@ -428,7 +428,7 @@ static int X420ToI420(const uint8* src_y,
int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) {
// Copy a row of UV.
SplitUV(src_uv, dst_u, dst_v, halfwidth);
SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_uv += src_stride_uv;
......
......@@ -519,43 +519,43 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
}
int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C;
#if defined(HAS_MERGEUV_SSE2)
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2;
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2;
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2;
MergeUVRow_ = MergeUVRow_SSE2;
}
}
}
#endif
#if defined(HAS_MERGEUV_AVX2)
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2;
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2;
MergeUVRow_ = MergeUVRow_Unaligned_AVX2;
if (IS_ALIGNED(src_u, 32) && IS_ALIGNED(src_stride_u, 32) &&
IS_ALIGNED(src_v, 32) && IS_ALIGNED(src_stride_v, 32) &&
IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2;
MergeUVRow_ = MergeUVRow_AVX2;
}
}
}
#endif
#if defined(HAS_MERGEUV_NEON)
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON;
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON;
MergeUVRow_ = MergeUVRow_Unaligned_NEON;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON;
MergeUVRow_ = MergeUVRow_NEON;
}
}
}
......@@ -565,7 +565,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) {
// Merge a row of U and V into a row of UV.
MergeUV(src_u, src_v, dst_uv, halfwidth);
MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
src_u += src_stride_u;
src_v += src_stride_v;
dst_uv += dst_stride_uv;
......
......@@ -248,37 +248,37 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
#endif
int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C;
#if defined(HAS_MERGEUV_SSE2)
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2;
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2;
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2;
MergeUVRow_ = MergeUVRow_SSE2;
}
}
}
#endif
#if defined(HAS_MERGEUV_AVX2)
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2;
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2;
MergeUVRow_ = MergeUVRow_Unaligned_AVX2;
if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2;
MergeUVRow_ = MergeUVRow_AVX2;
}
}
}
#endif
#if defined(HAS_MERGEUV_NEON)
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON;
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON;
MergeUVRow_ = MergeUVRow_Unaligned_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON;
MergeUVRow_ = MergeUVRow_NEON;
}
}
}
......@@ -289,7 +289,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
for (int y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
MergeUV(row_u, row_v, dst_uv, halfwidth);
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
......@@ -298,7 +298,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
MergeUV(row_u, row_v, dst_uv, halfwidth);
MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
}
......@@ -357,37 +357,37 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
#endif
int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C;
#if defined(HAS_MERGEUV_SSE2)
void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUVRow_C;
#if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2;
MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2;
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2;
MergeUVRow_ = MergeUVRow_SSE2;
}
}
}
#endif
#if defined(HAS_MERGEUV_AVX2)
#if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2;
MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2;
MergeUVRow_ = MergeUVRow_Unaligned_AVX2;
if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2;
MergeUVRow_ = MergeUVRow_AVX2;
}
}
}
#endif
#if defined(HAS_MERGEUV_NEON)
#if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON;
MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON;
MergeUVRow_ = MergeUVRow_Unaligned_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON;
MergeUVRow_ = MergeUVRow_NEON;
}
}
}
......@@ -398,7 +398,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
for (int y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
MergeUV(row_v, row_u, dst_uv, halfwidth);
MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2;
......@@ -407,7 +407,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
}
if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width);
MergeUV(row_v, row_u, dst_uv, halfwidth);
MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
}
......
......@@ -656,17 +656,17 @@ LIBYUV_API
void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value) {
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C;
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
#if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
SetRow = SetRow8_NEON;
SetRow = SetRow_NEON;
}
#endif
#if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
SetRow = SetRow8_X86;
SetRow = SetRow_X86;
}
#endif
......@@ -721,17 +721,17 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
#if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
SetRows32_NEON(dst, value, width, dst_stride_argb, height);
ARGBSetRows_NEON(dst, value, width, dst_stride_argb, height);
return 0;
}
#endif
#if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
SetRows32_X86(dst, value, width, dst_stride_argb, height);
ARGBSetRows_X86(dst, value, width, dst_stride_argb, height);
return 0;
}
#endif
SetRows32_C(dst, value, width, dst_stride_argb, height);
ARGBSetRows_C(dst, value, width, dst_stride_argb, height);
return 0;
}
......@@ -985,7 +985,7 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
}
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
}
......@@ -1015,12 +1015,12 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
}
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C;
#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2)
void (*CUMULATIVESUMTOAVERAGEROW)(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
CumulativeSumToAverage = CumulativeSumToAverage_SSE2;
CUMULATIVESUMTOAVERAGEROW = CumulativeSumToAverageRow_SSE2;
}
#endif
// Compute enough CumulativeSum for first row to be blurred. After this
......@@ -1065,7 +1065,7 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
int boxwidth = radius * 4;
int x;
for (x = 0; x < radius + 1; ++x) {
CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row,
CUMULATIVESUMTOAVERAGEROW(cumsum_top_row, cumsum_bot_row,
boxwidth, area, &dst_argb[x * 4], 1);
area += (bot_y - top_y);
boxwidth += 4;
......@@ -1073,14 +1073,14 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
// Middle unclipped.
int n = (width - 1) - radius - x + 1;
CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row,
CUMULATIVESUMTOAVERAGEROW(cumsum_top_row, cumsum_bot_row,
boxwidth, area, &dst_argb[x * 4], n);
// Right clipped.
for (x += n; x <= width - 1; ++x) {
area -= (bot_y - top_y);
boxwidth -= 4;
CumulativeSumToAverage(cumsum_top_row + (x - radius - 1) * 4,
CUMULATIVESUMTOAVERAGEROW(cumsum_top_row + (x - radius - 1) * 4,
cumsum_bot_row + (x - radius - 1) * 4,
boxwidth, area, &dst_argb[x * 4], 1);
}
......@@ -1104,7 +1104,7 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
}
void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
int width, uint32 value) = ARGBShadeRow_C;
#if defined(HAS_ARGBSHADE_SSE2)
#if defined(HAS_ARGBSHADEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
......
......@@ -1057,7 +1057,7 @@ void RotateUV180(const uint8* src, int src_stride,
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
MirrorRowUV = MirrorUVRow_SSSE3;
}
#elif defined(HAS_MirrorUVRow_MIPS_DSPR2)
#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
......
......@@ -294,7 +294,7 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
#endif
#undef UV422ANY
#define SPLITUVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
#define SplitUVRowANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
void NAMEANY(const uint8* src_uv, \
uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~MASK; \
......@@ -305,21 +305,21 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
width & MASK); \
}
#ifdef HAS_SPLITUV_SSE2
SPLITUVANY(SplitUV_Any_SSE2, SplitUV_Unaligned_SSE2, SplitUV_C, 15)
#ifdef HAS_SPLITUVROW_SSE2
SplitUVRowANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
#endif
#ifdef HAS_SPLITUV_AVX2
SPLITUVANY(SplitUV_Any_AVX2, SplitUV_Unaligned_AVX2, SplitUV_C, 31)
#ifdef HAS_SPLITUVROW_AVX2
SplitUVRowANY(SplitUVRow_Any_AVX2, SplitUVRow_Unaligned_AVX2, SplitUVRow_C, 31)
#endif
#ifdef HAS_SPLITUV_NEON
SPLITUVANY(SplitUV_Any_NEON, SplitUV_Unaligned_NEON, SplitUV_C, 15)
#ifdef HAS_SPLITUVROW_NEON
SplitUVRowANY(SplitUVRow_Any_NEON, SplitUVRow_Unaligned_NEON, SplitUVRow_C, 15)
#endif
#ifdef HAS_SPLITUV_MIPS_DSPR2
SPLITUVANY(SplitUV_Any_MIPS_DSPR2, SplitUV_Unaligned_MIPS_DSPR2, SplitUV_C, 15)
#ifdef HAS_SPLITUVROW_MIPS_DSPR2
SplitUVRowANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2, SplitUVRow_C, 15)
#endif
#undef SPLITUVANY
#undef SplitUVRowANY
#define MERGEUVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
#define MergeUVRow_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
void NAMEANY(const uint8* src_u, const uint8* src_v, \
uint8* dst_uv, int width) { \
int n = width & ~MASK; \
......@@ -330,16 +330,16 @@ SPLITUVANY(SplitUV_Any_MIPS_DSPR2, SplitUV_Unaligned_MIPS_DSPR2, SplitUV_C, 15)
width & MASK); \
}
#ifdef HAS_MERGEUV_SSE2
MERGEUVANY(MergeUV_Any_SSE2, MergeUV_Unaligned_SSE2, MergeUV_C, 15)
#ifdef HAS_MERGEUVROW_SSE2
MergeUVRow_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
#endif
#ifdef HAS_MERGEUV_AVX2
MERGEUVANY(MergeUV_Any_AVX2, MergeUV_Unaligned_AVX2, MergeUV_C, 31)
#ifdef HAS_MERGEUVROW_AVX2
MergeUVRow_ANY(MergeUVRow_Any_AVX2, MergeUVRow_Unaligned_AVX2, MergeUVRow_C, 31)
#endif
#ifdef HAS_MERGEUV_NEON
MERGEUVANY(MergeUV_Any_NEON, MergeUV_Unaligned_NEON, MergeUV_C, 15)
#ifdef HAS_MERGEUVROW_NEON
MergeUVRow_ANY(MergeUVRow_Any_NEON, MergeUVRow_Unaligned_NEON, MergeUVRow_C, 15)
#endif
#undef MERGEUVANY
#undef MergeUVRow_ANY
#ifdef __cplusplus
} // extern "C"
......
......@@ -1187,7 +1187,7 @@ void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
}
}
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_u[x] = src_uv[0];
dst_u[x + 1] = src_uv[2];
......@@ -1201,8 +1201,8 @@ void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
}
}
void MergeUV_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_uv[0] = src_u[x];
dst_uv[1] = src_v[x];
......@@ -1220,7 +1220,7 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) {
memcpy(dst, src, count);
}
void SetRow8_C(uint8* dst, uint32 v8, int count) {
void SetRow_C(uint8* dst, uint32 v8, int count) {
#ifdef _MSC_VER
// VC will generate rep stosb.
for (int x = 0; x < count; ++x) {
......@@ -1231,7 +1231,7 @@ void SetRow8_C(uint8* dst, uint32 v8, int count) {
#endif
}
void SetRows32_C(uint8* dst, uint32 v32, int width,
void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
int dst_stride, int height) {
for (int y = 0; y < height; ++y) {
uint32* d = reinterpret_cast<uint32*>(dst);
......@@ -1498,8 +1498,8 @@ void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
}
}
void CumulativeSumToAverage_C(const int32* tl, const int32* bl,
int w, int area, uint8* dst, int count) {
void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
int w, int area, uint8* dst, int count) {
float ooa = 1.0f / area;
for (int i = 0; i < count; ++i) {
dst[0] = static_cast<uint8>((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
......
......@@ -23,9 +23,9 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
}
#endif
#ifdef HAS_SPLITUV_MIPS_DSPR2
void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
#ifdef HAS_SPLITUVROW_MIPS_DSPR2
void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
......@@ -90,8 +90,8 @@ void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
);
}
void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int width) {
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
......@@ -171,7 +171,7 @@ void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
"t4", "t5", "t6", "t7", "t8", "t9"
);
}
#endif // HAS_SPLITUV_MIPS_DSPR2
#endif // HAS_SPLITUVROW_MIPS_DSPR2
#ifdef HAS_MIRRORROW_MIPS_DSPR2
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
......@@ -225,7 +225,7 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_MIRRORROW_MIPS_DSPR2
#ifdef HAS_MirrorUVRow_MIPS_DSPR2
#ifdef HAS_MIRRORUVROW_MIPS_DSPR2
void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
int x = 0;
......@@ -315,9 +315,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"t5", "t7", "t8", "t9"
);
}
#endif // HAS_MirrorUVRow_MIPS_DSPR2
#endif // HAS_MIRRORUVROW_MIPS_DSPR2
// Convert (4 Y and 2 VU) I422 and arrange RGB values into
// t5 = | 0 | B0 | 0 | b0 |
......
......@@ -747,10 +747,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
}
#endif // HAS_UYVYTOARGBROW_NEON
#ifdef HAS_SPLITUV_NEON
#ifdef HAS_SPLITUVROW_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
".p2align 2 \n"
"1: \n"
......@@ -770,8 +771,8 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: Multiple of 16 pixels, pointers unaligned.
void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile (
".p2align 2 \n"
"1: \n"
......@@ -788,13 +789,13 @@ void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#endif // HAS_SPLITUV_NEON
#endif // HAS_SPLITUVROW_NEON
#ifdef HAS_MERGEUV_NEON
#ifdef HAS_MERGEUVROW_NEON
// Reads 16 U's and V's and writes out 16 pairs of UV.
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
asm volatile (
".p2align 2 \n"
"1: \n"
......@@ -814,7 +815,7 @@ void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) {
asm volatile (
".p2align 2 \n"
......@@ -833,7 +834,7 @@ void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#endif // HAS_MERGEUV_NEON
#endif // HAS_MERGEUVROW_NEON
#ifdef HAS_COPYROW_NEON
// Copy multiple of 32. vld4.u8 allow unaligned and is fastest on a15.
void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
......@@ -855,7 +856,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
#ifdef HAS_SETROW_NEON
// SetRow8 writes 'count' bytes using a 32 bit value repeated.
void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
void SetRow_NEON(uint8* dst, uint32 v32, int count) {
asm volatile (
"vdup.u32 q0, %2 \n" // duplicate 4 ints
"1: \n"
......@@ -871,10 +872,10 @@ void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
// TODO(fbarchard): Make fully assembler
// SetRow32 writes 'count' words using a 32 bit value repeated.
void SetRows32_NEON(uint8* dst, uint32 v32, int width,
void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
int dst_stride, int height) {
for (int y = 0; y < height; ++y) {
SetRow8_NEON(dst, v32, width << 2);
SetRow_NEON(dst, v32, width << 2);
dst += dst_stride;
}
}
......
......@@ -2621,8 +2621,8 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
}
#endif // HAS_ARGBMIRRORROW_SSSE3
#ifdef HAS_SPLITUV_SSE2
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
#ifdef HAS_SPLITUVROW_SSE2
void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
......@@ -2657,8 +2657,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
);
}
void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix) {
void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
......@@ -2692,11 +2692,11 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
#endif
);
}
#endif // HAS_SPLITUV_SSE2
#endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_MERGEUV_SSE2
void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
#ifdef HAS_MERGEUVROW_SSE2
void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
asm volatile (
"sub %0,%1 \n"
".p2align 4 \n"
......@@ -2724,8 +2724,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
);
}
void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) {
void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) {
asm volatile (
"sub %0,%1 \n"
".p2align 4 \n"
......@@ -2752,7 +2752,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
#endif
);
}
#endif // HAS_MERGEUV_SSE2
#endif // HAS_MERGEUVROW_SSE2
#ifdef HAS_COPYROW_SSE2
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
......@@ -2795,7 +2795,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) {
#endif // HAS_COPYROW_X86
#ifdef HAS_SETROW_X86
void SetRow8_X86(uint8* dst, uint32 v32, int width) {
void SetRow_X86(uint8* dst, uint32 v32, int width) {
size_t width_tmp = static_cast<size_t>(width);
asm volatile (
"shr $0x2,%1 \n"
......@@ -2806,7 +2806,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int width) {
: "memory", "cc");
}
void SetRows32_X86(uint8* dst, uint32 v32, int width,
void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
int dst_stride, int height) {
for (int y = 0; y < height; ++y) {
size_t width_tmp = static_cast<size_t>(width);
......@@ -4002,9 +4002,10 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
}
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2
void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) {
#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst,
int count) {
asm volatile (
"movd %5,%%xmm4 \n"
"cvtdq2ps %%xmm4,%%xmm4 \n"
......@@ -4089,8 +4090,8 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
#endif
);
}
#endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2
#ifdef HAS_ARGBSHADE_SSE2
#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value.
// Aligned to 16 bytes.
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
......@@ -4127,7 +4128,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
#endif
);
}
#endif // HAS_ARGBSHADE_SSE2
#endif // HAS_ARGBSHADEROW_SSE2
#ifdef HAS_ARGBAFFINEROW_SSE2
// TODO(fbarchard): Find 64 bit way to avoid masking.
......
......@@ -2767,9 +2767,9 @@ __asm {
}
#endif // HAS_ARGBMIRRORROW_SSSE3
#ifdef HAS_SPLITUV_SSE2
#ifdef HAS_SPLITUVROW_SSE2
__declspec(naked) __declspec(align(16))
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
......@@ -2805,8 +2805,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
}
__declspec(naked) __declspec(align(16))
void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix) {
void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
......@@ -2840,12 +2840,12 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
ret
}
}
#endif // HAS_SPLITUV_SSE2
#endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_MERGEUV_SSE2
#ifdef HAS_MERGEUVROW_SSE2
__declspec(naked) __declspec(align(16))
void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_u
......@@ -2874,8 +2874,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
}
__declspec(naked) __declspec(align(16))
void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) {
void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_u
......@@ -2902,7 +2902,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
ret
}
}
#endif // HAS_MERGEUV_SSE2
#endif // HAS_MERGEUVROW_SSE2
#ifdef HAS_COPYROW_SSE2
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
......@@ -2949,7 +2949,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int count) {
#ifdef HAS_SETROW_X86
// SetRow8 writes 'count' bytes using a 32 bit value repeated.
__declspec(naked) __declspec(align(16))
void SetRow8_X86(uint8* dst, uint32 v32, int count) {
void SetRow_X86(uint8* dst, uint32 v32, int count) {
__asm {
mov edx, edi
mov edi, [esp + 4] // dst
......@@ -2964,7 +2964,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int count) {
// SetRow32 writes 'count' words using a 32 bit value repeated.
__declspec(naked) __declspec(align(16))
void SetRows32_X86(uint8* dst, uint32 v32, int width,
void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
int dst_stride, int height) {
__asm {
push esi
......@@ -4125,7 +4125,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
}
#endif // HAS_ARGBQUANTIZEROW_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
// Consider float CumulativeSum.
// Consider calling CumulativeSum one row at time as needed.
// Consider circular CumulativeSum buffer of radius * 2 + 1 height.
......@@ -4139,8 +4139,9 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
// count is number of averaged pixels to produce.
// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte
// aligned.
void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) {
void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst,
int count) {
__asm {
mov eax, topleft // eax topleft
mov esi, botleft // esi botleft
......@@ -4228,7 +4229,7 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
l1b:
}
}
#endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2
#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
// Creates a table of cumulative sums where each value is a sum of all values
......@@ -4315,7 +4316,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
}
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
#ifdef HAS_ARGBSHADE_SSE2
#ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value.
// Aligned to 16 bytes.
__declspec(naked) __declspec(align(16))
......@@ -4349,7 +4350,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
ret
}
}
#endif // HAS_ARGBSHADE_SSE2
#endif // HAS_ARGBSHADEROW_SSE2
#ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination.
......
......@@ -65,10 +65,10 @@ YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
; void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
%macro SPLITUV 1-2
cglobal SplitUV%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
%macro SplitUVRow 1-2
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
psrlw m4, m4, 8
sub dst_vq, dst_uq
......@@ -95,20 +95,20 @@ cglobal SplitUV%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
%endmacro
INIT_MMX MMX
SPLITUV a,
SPLITUV u,_Unaligned
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_XMM SSE2
SPLITUV a,
SPLITUV u,_Unaligned
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_YMM AVX2
SPLITUV a,
SPLITUV u,_Unaligned
SplitUVRow a,
SplitUVRow u,_Unaligned
; void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width);
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width);
%macro MergeUV 1-2
cglobal MergeUV%2, 4, 4, 3, src_u, src_v, dst_uv, pix
%macro MergeUVRow_ 1-2
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
sub src_vq, src_uq
ALIGN 16
......@@ -128,12 +128,12 @@ cglobal MergeUV%2, 4, 4, 3, src_u, src_v, dst_uv, pix
%endmacro
INIT_MMX MMX
MERGEUV a,
MERGEUV u,_Unaligned
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_XMM SSE2
MERGEUV a,
MERGEUV u,_Unaligned
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_YMM AVX2
MERGEUV a,
MERGEUV u,_Unaligned
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment