Commit f08ac6bb authored by fbarchard@google.com's avatar fbarchard@google.com

Rename row functions so they are all SomethingRow_CPU

BUG=133
TEST=still builds
Review URL: https://webrtc-codereview.appspot.com/939020

git-svn-id: http://libyuv.googlecode.com/svn/trunk@491 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent d985cf1f
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 490 Version: 491
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
This diff is collapsed.
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 490 #define LIBYUV_VERSION 491
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
...@@ -365,56 +365,56 @@ static int X420ToI420(const uint8* src_y, ...@@ -365,56 +365,56 @@ static int X420ToI420(const uint8* src_y,
} }
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
SplitUV_C; SplitUVRow_C;
#if defined(HAS_SPLITUV_SSE2) #if defined(HAS_SPLITUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
SplitUV = SplitUV_Any_SSE2; SplitUVRow = SplitUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_Unaligned_SSE2; SplitUVRow = SplitUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_SSE2; SplitUVRow = SplitUVRow_SSE2;
} }
} }
} }
#endif #endif
#if defined(HAS_SPLITUV_AVX2) #if defined(HAS_SPLITUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
SplitUV = SplitUV_Any_AVX2; SplitUVRow = SplitUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
SplitUV = SplitUV_Unaligned_AVX2; SplitUVRow = SplitUVRow_Unaligned_AVX2;
if (IS_ALIGNED(src_uv, 32) && IS_ALIGNED(src_stride_uv, 32) && if (IS_ALIGNED(src_uv, 32) && IS_ALIGNED(src_stride_uv, 32) &&
IS_ALIGNED(dst_u, 32) && IS_ALIGNED(dst_stride_u, 32) && IS_ALIGNED(dst_u, 32) && IS_ALIGNED(dst_stride_u, 32) &&
IS_ALIGNED(dst_v, 32) && IS_ALIGNED(dst_stride_v, 32)) { IS_ALIGNED(dst_v, 32) && IS_ALIGNED(dst_stride_v, 32)) {
SplitUV = SplitUV_AVX2; SplitUVRow = SplitUVRow_AVX2;
} }
} }
} }
#endif #endif
#if defined(HAS_SPLITUV_NEON) #if defined(HAS_SPLITUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
SplitUV = SplitUV_Any_NEON; SplitUVRow = SplitUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_Unaligned_NEON; SplitUVRow = SplitUVRow_Unaligned_NEON;
if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_NEON; SplitUVRow = SplitUVRow_NEON;
} }
} }
} }
#endif #endif
#if defined(HAS_SPLITUV_MIPS_DSPR2) #if defined(HAS_SPLITUVROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
SplitUV = SplitUV_Any_MIPS_DSPR2; SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_Unaligned_MIPS_DSPR2; SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) && if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) { IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
SplitUV = SplitUV_MIPS_DSPR2; SplitUVRow = SplitUVRow_MIPS_DSPR2;
} }
} }
} }
...@@ -428,7 +428,7 @@ static int X420ToI420(const uint8* src_y, ...@@ -428,7 +428,7 @@ static int X420ToI420(const uint8* src_y,
int halfheight = (height + 1) >> 1; int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) { for (int y = 0; y < halfheight; ++y) {
// Copy a row of UV. // Copy a row of UV.
SplitUV(src_uv, dst_u, dst_v, halfwidth); SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
dst_u += dst_stride_u; dst_u += dst_stride_u;
dst_v += dst_stride_v; dst_v += dst_stride_v;
src_uv += src_stride_uv; src_uv += src_stride_uv;
......
...@@ -519,43 +519,43 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, ...@@ -519,43 +519,43 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
} }
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C; int width) = MergeUVRow_C;
#if defined(HAS_MERGEUV_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2; MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
} }
#endif #endif
#if defined(HAS_MERGEUV_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2; MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2; MergeUVRow_ = MergeUVRow_Unaligned_AVX2;
if (IS_ALIGNED(src_u, 32) && IS_ALIGNED(src_stride_u, 32) && if (IS_ALIGNED(src_u, 32) && IS_ALIGNED(src_stride_u, 32) &&
IS_ALIGNED(src_v, 32) && IS_ALIGNED(src_stride_v, 32) && IS_ALIGNED(src_v, 32) && IS_ALIGNED(src_stride_v, 32) &&
IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2; MergeUVRow_ = MergeUVRow_AVX2;
} }
} }
} }
#endif #endif
#if defined(HAS_MERGEUV_NEON) #if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON; MergeUVRow_ = MergeUVRow_Unaligned_NEON;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON; MergeUVRow_ = MergeUVRow_NEON;
} }
} }
} }
...@@ -565,7 +565,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, ...@@ -565,7 +565,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
int halfheight = (height + 1) >> 1; int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) { for (int y = 0; y < halfheight; ++y) {
// Merge a row of U and V into a row of UV. // Merge a row of U and V into a row of UV.
MergeUV(src_u, src_v, dst_uv, halfwidth); MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
src_u += src_stride_u; src_u += src_stride_u;
src_v += src_stride_v; src_v += src_stride_v;
dst_uv += dst_stride_uv; dst_uv += dst_stride_uv;
......
...@@ -248,37 +248,37 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, ...@@ -248,37 +248,37 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C; int width) = MergeUVRow_C;
#if defined(HAS_MERGEUV_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2; MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
} }
#endif #endif
#if defined(HAS_MERGEUV_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2; MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2; MergeUVRow_ = MergeUVRow_Unaligned_AVX2;
if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2; MergeUVRow_ = MergeUVRow_AVX2;
} }
} }
} }
#endif #endif
#if defined(HAS_MERGEUV_NEON) #if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON; MergeUVRow_ = MergeUVRow_Unaligned_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON; MergeUVRow_ = MergeUVRow_NEON;
} }
} }
} }
...@@ -289,7 +289,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, ...@@ -289,7 +289,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
for (int y = 0; y < height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
MergeUV(row_u, row_v, dst_uv, halfwidth); MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2; src_argb += src_stride_argb * 2;
...@@ -298,7 +298,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, ...@@ -298,7 +298,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
} }
if (height & 1) { if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width); ARGBToUVRow(src_argb, 0, row_u, row_v, width);
MergeUV(row_u, row_v, dst_uv, halfwidth); MergeUVRow_(row_u, row_v, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width); ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
} }
...@@ -357,37 +357,37 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, ...@@ -357,37 +357,37 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
} }
#endif #endif
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) = MergeUV_C; int width) = MergeUVRow_C;
#if defined(HAS_MERGEUV_SSE2) #if defined(HAS_MERGEUVROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUV = MergeUV_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_SSE2; MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
} }
#endif #endif
#if defined(HAS_MERGEUV_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
MergeUV = MergeUV_Any_AVX2; MergeUVRow_ = MergeUVRow_Any_AVX2;
if (IS_ALIGNED(halfwidth, 32)) { if (IS_ALIGNED(halfwidth, 32)) {
MergeUV = MergeUV_Unaligned_AVX2; MergeUVRow_ = MergeUVRow_Unaligned_AVX2;
if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) {
MergeUV = MergeUV_AVX2; MergeUVRow_ = MergeUVRow_AVX2;
} }
} }
} }
#endif #endif
#if defined(HAS_MERGEUV_NEON) #if defined(HAS_MERGEUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
MergeUV = MergeUV_Any_NEON; MergeUVRow_ = MergeUVRow_Any_NEON;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUV = MergeUV_Unaligned_NEON; MergeUVRow_ = MergeUVRow_Unaligned_NEON;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUV = MergeUV_NEON; MergeUVRow_ = MergeUVRow_NEON;
} }
} }
} }
...@@ -398,7 +398,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, ...@@ -398,7 +398,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
for (int y = 0; y < height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width);
MergeUV(row_v, row_u, dst_uv, halfwidth); MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width);
src_argb += src_stride_argb * 2; src_argb += src_stride_argb * 2;
...@@ -407,7 +407,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, ...@@ -407,7 +407,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
} }
if (height & 1) { if (height & 1) {
ARGBToUVRow(src_argb, 0, row_u, row_v, width); ARGBToUVRow(src_argb, 0, row_u, row_v, width);
MergeUV(row_v, row_u, dst_uv, halfwidth); MergeUVRow_(row_v, row_u, dst_uv, halfwidth);
ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width); ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width);
} }
......
...@@ -656,17 +656,17 @@ LIBYUV_API ...@@ -656,17 +656,17 @@ LIBYUV_API
void SetPlane(uint8* dst_y, int dst_stride_y, void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height, int width, int height,
uint32 value) { uint32 value) {
void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C; void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
#if defined(HAS_SETROW_NEON) #if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
SetRow = SetRow8_NEON; SetRow = SetRow_NEON;
} }
#endif #endif
#if defined(HAS_SETROW_X86) #if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
SetRow = SetRow8_X86; SetRow = SetRow_X86;
} }
#endif #endif
...@@ -721,17 +721,17 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, ...@@ -721,17 +721,17 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
#if defined(HAS_SETROW_NEON) #if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
SetRows32_NEON(dst, value, width, dst_stride_argb, height); ARGBSetRows_NEON(dst, value, width, dst_stride_argb, height);
return 0; return 0;
} }
#endif #endif
#if defined(HAS_SETROW_X86) #if defined(HAS_SETROW_X86)
if (TestCpuFlag(kCpuHasX86)) { if (TestCpuFlag(kCpuHasX86)) {
SetRows32_X86(dst, value, width, dst_stride_argb, height); ARGBSetRows_X86(dst, value, width, dst_stride_argb, height);
return 0; return 0;
} }
#endif #endif
SetRows32_C(dst, value, width, dst_stride_argb, height); ARGBSetRows_C(dst, value, width, dst_stride_argb, height);
return 0; return 0;
} }
...@@ -985,7 +985,7 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, ...@@ -985,7 +985,7 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
} }
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2) #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
} }
...@@ -1015,12 +1015,12 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, ...@@ -1015,12 +1015,12 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
} }
void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft, void (*CUMULATIVESUMTOAVERAGEROW)(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C; int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2) #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
CumulativeSumToAverage = CumulativeSumToAverage_SSE2; CUMULATIVESUMTOAVERAGEROW = CumulativeSumToAverageRow_SSE2;
} }
#endif #endif
// Compute enough CumulativeSum for first row to be blurred. After this // Compute enough CumulativeSum for first row to be blurred. After this
...@@ -1065,7 +1065,7 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, ...@@ -1065,7 +1065,7 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
int boxwidth = radius * 4; int boxwidth = radius * 4;
int x; int x;
for (x = 0; x < radius + 1; ++x) { for (x = 0; x < radius + 1; ++x) {
CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row, CUMULATIVESUMTOAVERAGEROW(cumsum_top_row, cumsum_bot_row,
boxwidth, area, &dst_argb[x * 4], 1); boxwidth, area, &dst_argb[x * 4], 1);
area += (bot_y - top_y); area += (bot_y - top_y);
boxwidth += 4; boxwidth += 4;
...@@ -1073,14 +1073,14 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, ...@@ -1073,14 +1073,14 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb,
// Middle unclipped. // Middle unclipped.
int n = (width - 1) - radius - x + 1; int n = (width - 1) - radius - x + 1;
CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row, CUMULATIVESUMTOAVERAGEROW(cumsum_top_row, cumsum_bot_row,
boxwidth, area, &dst_argb[x * 4], n); boxwidth, area, &dst_argb[x * 4], n);
// Right clipped. // Right clipped.
for (x += n; x <= width - 1; ++x) { for (x += n; x <= width - 1; ++x) {
area -= (bot_y - top_y); area -= (bot_y - top_y);
boxwidth -= 4; boxwidth -= 4;
CumulativeSumToAverage(cumsum_top_row + (x - radius - 1) * 4, CUMULATIVESUMTOAVERAGEROW(cumsum_top_row + (x - radius - 1) * 4,
cumsum_bot_row + (x - radius - 1) * 4, cumsum_bot_row + (x - radius - 1) * 4,
boxwidth, area, &dst_argb[x * 4], 1); boxwidth, area, &dst_argb[x * 4], 1);
} }
...@@ -1104,7 +1104,7 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, ...@@ -1104,7 +1104,7 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
} }
void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
int width, uint32 value) = ARGBShadeRow_C; int width, uint32 value) = ARGBShadeRow_C;
#if defined(HAS_ARGBSHADE_SSE2) #if defined(HAS_ARGBSHADEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
......
...@@ -1057,7 +1057,7 @@ void RotateUV180(const uint8* src, int src_stride, ...@@ -1057,7 +1057,7 @@ void RotateUV180(const uint8* src, int src_stride,
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
MirrorRowUV = MirrorUVRow_SSSE3; MirrorRowUV = MirrorUVRow_SSSE3;
} }
#elif defined(HAS_MirrorUVRow_MIPS_DSPR2) #elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
MirrorRowUV = MirrorUVRow_MIPS_DSPR2; MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
......
...@@ -294,7 +294,7 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, ...@@ -294,7 +294,7 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
#endif #endif
#undef UV422ANY #undef UV422ANY
#define SPLITUVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ #define SplitUVRowANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
void NAMEANY(const uint8* src_uv, \ void NAMEANY(const uint8* src_uv, \
uint8* dst_u, uint8* dst_v, int width) { \ uint8* dst_u, uint8* dst_v, int width) { \
int n = width & ~MASK; \ int n = width & ~MASK; \
...@@ -305,21 +305,21 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, ...@@ -305,21 +305,21 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
width & MASK); \ width & MASK); \
} }
#ifdef HAS_SPLITUV_SSE2 #ifdef HAS_SPLITUVROW_SSE2
SPLITUVANY(SplitUV_Any_SSE2, SplitUV_Unaligned_SSE2, SplitUV_C, 15) SplitUVRowANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15)
#endif #endif
#ifdef HAS_SPLITUV_AVX2 #ifdef HAS_SPLITUVROW_AVX2
SPLITUVANY(SplitUV_Any_AVX2, SplitUV_Unaligned_AVX2, SplitUV_C, 31) SplitUVRowANY(SplitUVRow_Any_AVX2, SplitUVRow_Unaligned_AVX2, SplitUVRow_C, 31)
#endif #endif
#ifdef HAS_SPLITUV_NEON #ifdef HAS_SPLITUVROW_NEON
SPLITUVANY(SplitUV_Any_NEON, SplitUV_Unaligned_NEON, SplitUV_C, 15) SplitUVRowANY(SplitUVRow_Any_NEON, SplitUVRow_Unaligned_NEON, SplitUVRow_C, 15)
#endif #endif
#ifdef HAS_SPLITUV_MIPS_DSPR2 #ifdef HAS_SPLITUVROW_MIPS_DSPR2
SPLITUVANY(SplitUV_Any_MIPS_DSPR2, SplitUV_Unaligned_MIPS_DSPR2, SplitUV_C, 15) SplitUVRowANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2, SplitUVRow_C, 15)
#endif #endif
#undef SPLITUVANY #undef SplitUVRowANY
#define MERGEUVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ #define MergeUVRow_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \
void NAMEANY(const uint8* src_u, const uint8* src_v, \ void NAMEANY(const uint8* src_u, const uint8* src_v, \
uint8* dst_uv, int width) { \ uint8* dst_uv, int width) { \
int n = width & ~MASK; \ int n = width & ~MASK; \
...@@ -330,16 +330,16 @@ SPLITUVANY(SplitUV_Any_MIPS_DSPR2, SplitUV_Unaligned_MIPS_DSPR2, SplitUV_C, 15) ...@@ -330,16 +330,16 @@ SPLITUVANY(SplitUV_Any_MIPS_DSPR2, SplitUV_Unaligned_MIPS_DSPR2, SplitUV_C, 15)
width & MASK); \ width & MASK); \
} }
#ifdef HAS_MERGEUV_SSE2 #ifdef HAS_MERGEUVROW_SSE2
MERGEUVANY(MergeUV_Any_SSE2, MergeUV_Unaligned_SSE2, MergeUV_C, 15) MergeUVRow_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15)
#endif #endif
#ifdef HAS_MERGEUV_AVX2 #ifdef HAS_MERGEUVROW_AVX2
MERGEUVANY(MergeUV_Any_AVX2, MergeUV_Unaligned_AVX2, MergeUV_C, 31) MergeUVRow_ANY(MergeUVRow_Any_AVX2, MergeUVRow_Unaligned_AVX2, MergeUVRow_C, 31)
#endif #endif
#ifdef HAS_MERGEUV_NEON #ifdef HAS_MERGEUVROW_NEON
MERGEUVANY(MergeUV_Any_NEON, MergeUV_Unaligned_NEON, MergeUV_C, 15) MergeUVRow_ANY(MergeUVRow_Any_NEON, MergeUVRow_Unaligned_NEON, MergeUVRow_C, 15)
#endif #endif
#undef MERGEUVANY #undef MergeUVRow_ANY
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
......
...@@ -1187,7 +1187,7 @@ void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { ...@@ -1187,7 +1187,7 @@ void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
} }
} }
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < width - 1; x += 2) {
dst_u[x] = src_uv[0]; dst_u[x] = src_uv[0];
dst_u[x + 1] = src_uv[2]; dst_u[x + 1] = src_uv[2];
...@@ -1201,8 +1201,8 @@ void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { ...@@ -1201,8 +1201,8 @@ void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
} }
} }
void MergeUV_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) { int width) {
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < width - 1; x += 2) {
dst_uv[0] = src_u[x]; dst_uv[0] = src_u[x];
dst_uv[1] = src_v[x]; dst_uv[1] = src_v[x];
...@@ -1220,7 +1220,7 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) { ...@@ -1220,7 +1220,7 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) {
memcpy(dst, src, count); memcpy(dst, src, count);
} }
void SetRow8_C(uint8* dst, uint32 v8, int count) { void SetRow_C(uint8* dst, uint32 v8, int count) {
#ifdef _MSC_VER #ifdef _MSC_VER
// VC will generate rep stosb. // VC will generate rep stosb.
for (int x = 0; x < count; ++x) { for (int x = 0; x < count; ++x) {
...@@ -1231,7 +1231,7 @@ void SetRow8_C(uint8* dst, uint32 v8, int count) { ...@@ -1231,7 +1231,7 @@ void SetRow8_C(uint8* dst, uint32 v8, int count) {
#endif #endif
} }
void SetRows32_C(uint8* dst, uint32 v32, int width, void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
int dst_stride, int height) { int dst_stride, int height) {
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
uint32* d = reinterpret_cast<uint32*>(dst); uint32* d = reinterpret_cast<uint32*>(dst);
...@@ -1498,8 +1498,8 @@ void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, ...@@ -1498,8 +1498,8 @@ void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
} }
} }
void CumulativeSumToAverage_C(const int32* tl, const int32* bl, void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
int w, int area, uint8* dst, int count) { int w, int area, uint8* dst, int count) {
float ooa = 1.0f / area; float ooa = 1.0f / area;
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
dst[0] = static_cast<uint8>((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); dst[0] = static_cast<uint8>((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
......
...@@ -23,9 +23,9 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) { ...@@ -23,9 +23,9 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
} }
#endif #endif
#ifdef HAS_SPLITUV_MIPS_DSPR2 #ifdef HAS_SPLITUVROW_MIPS_DSPR2
void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) { int width) {
__asm__ __volatile__ ( __asm__ __volatile__ (
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
...@@ -90,8 +90,8 @@ void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -90,8 +90,8 @@ void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
); );
} }
void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int width) { uint8* dst_v, int width) {
__asm__ __volatile__ ( __asm__ __volatile__ (
".set push \n" ".set push \n"
".set noreorder \n" ".set noreorder \n"
...@@ -171,7 +171,7 @@ void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, ...@@ -171,7 +171,7 @@ void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
"t4", "t5", "t6", "t7", "t8", "t9" "t4", "t5", "t6", "t7", "t8", "t9"
); );
} }
#endif // HAS_SPLITUV_MIPS_DSPR2 #endif // HAS_SPLITUVROW_MIPS_DSPR2
#ifdef HAS_MIRRORROW_MIPS_DSPR2 #ifdef HAS_MIRRORROW_MIPS_DSPR2
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
...@@ -225,7 +225,7 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { ...@@ -225,7 +225,7 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
} }
#endif // HAS_MIRRORROW_MIPS_DSPR2 #endif // HAS_MIRRORROW_MIPS_DSPR2
#ifdef HAS_MirrorUVRow_MIPS_DSPR2 #ifdef HAS_MIRRORUVROW_MIPS_DSPR2
void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) { int width) {
int x = 0; int x = 0;
...@@ -315,9 +315,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -315,9 +315,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
"t5", "t7", "t8", "t9" "t5", "t7", "t8", "t9"
); );
} }
#endif // HAS_MirrorUVRow_MIPS_DSPR2 #endif // HAS_MIRRORUVROW_MIPS_DSPR2
// Convert (4 Y and 2 VU) I422 and arrange RGB values into // Convert (4 Y and 2 VU) I422 and arrange RGB values into
// t5 = | 0 | B0 | 0 | b0 | // t5 = | 0 | B0 | 0 | b0 |
......
...@@ -747,10 +747,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, ...@@ -747,10 +747,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy,
} }
#endif // HAS_UYVYTOARGBROW_NEON #endif // HAS_UYVYTOARGBROW_NEON
#ifdef HAS_SPLITUV_NEON #ifdef HAS_SPLITUVROW_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
asm volatile ( asm volatile (
".p2align 2 \n" ".p2align 2 \n"
"1: \n" "1: \n"
...@@ -770,8 +771,8 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { ...@@ -770,8 +771,8 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: Multiple of 16 pixels, pointers unaligned. // Alignment requirement: Multiple of 16 pixels, pointers unaligned.
void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) { int width) {
asm volatile ( asm volatile (
".p2align 2 \n" ".p2align 2 \n"
"1: \n" "1: \n"
...@@ -788,13 +789,13 @@ void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -788,13 +789,13 @@ void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
: "memory", "cc", "q0", "q1" // Clobber List : "memory", "cc", "q0", "q1" // Clobber List
); );
} }
#endif // HAS_SPLITUV_NEON #endif // HAS_SPLITUVROW_NEON
#ifdef HAS_MERGEUV_NEON #ifdef HAS_MERGEUVROW_NEON
// Reads 16 U's and V's and writes out 16 pairs of UV. // Reads 16 U's and V's and writes out 16 pairs of UV.
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) { int width) {
asm volatile ( asm volatile (
".p2align 2 \n" ".p2align 2 \n"
"1: \n" "1: \n"
...@@ -814,7 +815,7 @@ void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, ...@@ -814,7 +815,7 @@ void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
} }
// Reads 16 U's and V's and writes out 16 pairs of UV. // Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v, void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) { uint8* dst_uv, int width) {
asm volatile ( asm volatile (
".p2align 2 \n" ".p2align 2 \n"
...@@ -833,7 +834,7 @@ void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v, ...@@ -833,7 +834,7 @@ void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v,
: "memory", "cc", "q0", "q1" // Clobber List : "memory", "cc", "q0", "q1" // Clobber List
); );
} }
#endif // HAS_MERGEUV_NEON #endif // HAS_MERGEUVROW_NEON
#ifdef HAS_COPYROW_NEON #ifdef HAS_COPYROW_NEON
// Copy multiple of 32. vld4.u8 allow unaligned and is fastest on a15. // Copy multiple of 32. vld4.u8 allow unaligned and is fastest on a15.
void CopyRow_NEON(const uint8* src, uint8* dst, int count) { void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
...@@ -855,7 +856,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { ...@@ -855,7 +856,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) {
#ifdef HAS_SETROW_NEON #ifdef HAS_SETROW_NEON
// SetRow8 writes 'count' bytes using a 32 bit value repeated. // SetRow8 writes 'count' bytes using a 32 bit value repeated.
void SetRow8_NEON(uint8* dst, uint32 v32, int count) { void SetRow_NEON(uint8* dst, uint32 v32, int count) {
asm volatile ( asm volatile (
"vdup.u32 q0, %2 \n" // duplicate 4 ints "vdup.u32 q0, %2 \n" // duplicate 4 ints
"1: \n" "1: \n"
...@@ -871,10 +872,10 @@ void SetRow8_NEON(uint8* dst, uint32 v32, int count) { ...@@ -871,10 +872,10 @@ void SetRow8_NEON(uint8* dst, uint32 v32, int count) {
// TODO(fbarchard): Make fully assembler // TODO(fbarchard): Make fully assembler
// SetRow32 writes 'count' words using a 32 bit value repeated. // SetRow32 writes 'count' words using a 32 bit value repeated.
void SetRows32_NEON(uint8* dst, uint32 v32, int width, void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
int dst_stride, int height) { int dst_stride, int height) {
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
SetRow8_NEON(dst, v32, width << 2); SetRow_NEON(dst, v32, width << 2);
dst += dst_stride; dst += dst_stride;
} }
} }
......
...@@ -2621,8 +2621,8 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { ...@@ -2621,8 +2621,8 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) {
} }
#endif // HAS_ARGBMIRRORROW_SSSE3 #endif // HAS_ARGBMIRRORROW_SSSE3
#ifdef HAS_SPLITUV_SSE2 #ifdef HAS_SPLITUVROW_SSE2
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
asm volatile ( asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
...@@ -2657,8 +2657,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { ...@@ -2657,8 +2657,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
); );
} }
void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix) { int pix) {
asm volatile ( asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n"
...@@ -2692,11 +2692,11 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -2692,11 +2692,11 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
#endif #endif
); );
} }
#endif // HAS_SPLITUV_SSE2 #endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_MERGEUV_SSE2 #ifdef HAS_MERGEUVROW_SSE2
void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) { int width) {
asm volatile ( asm volatile (
"sub %0,%1 \n" "sub %0,%1 \n"
".p2align 4 \n" ".p2align 4 \n"
...@@ -2724,8 +2724,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, ...@@ -2724,8 +2724,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
); );
} }
void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) { uint8* dst_uv, int width) {
asm volatile ( asm volatile (
"sub %0,%1 \n" "sub %0,%1 \n"
".p2align 4 \n" ".p2align 4 \n"
...@@ -2752,7 +2752,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, ...@@ -2752,7 +2752,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
#endif #endif
); );
} }
#endif // HAS_MERGEUV_SSE2 #endif // HAS_MERGEUVROW_SSE2
#ifdef HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_SSE2
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
...@@ -2795,7 +2795,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) { ...@@ -2795,7 +2795,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) {
#endif // HAS_COPYROW_X86 #endif // HAS_COPYROW_X86
#ifdef HAS_SETROW_X86 #ifdef HAS_SETROW_X86
void SetRow8_X86(uint8* dst, uint32 v32, int width) { void SetRow_X86(uint8* dst, uint32 v32, int width) {
size_t width_tmp = static_cast<size_t>(width); size_t width_tmp = static_cast<size_t>(width);
asm volatile ( asm volatile (
"shr $0x2,%1 \n" "shr $0x2,%1 \n"
...@@ -2806,7 +2806,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int width) { ...@@ -2806,7 +2806,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int width) {
: "memory", "cc"); : "memory", "cc");
} }
void SetRows32_X86(uint8* dst, uint32 v32, int width, void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
int dst_stride, int height) { int dst_stride, int height) {
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
size_t width_tmp = static_cast<size_t>(width); size_t width_tmp = static_cast<size_t>(width);
...@@ -4002,9 +4002,10 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, ...@@ -4002,9 +4002,10 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
} }
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) { int width, int area, uint8* dst,
int count) {
asm volatile ( asm volatile (
"movd %5,%%xmm4 \n" "movd %5,%%xmm4 \n"
"cvtdq2ps %%xmm4,%%xmm4 \n" "cvtdq2ps %%xmm4,%%xmm4 \n"
...@@ -4089,8 +4090,8 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, ...@@ -4089,8 +4090,8 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
#endif #endif
); );
} }
#endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2 #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#ifdef HAS_ARGBSHADE_SSE2 #ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value. // Shade 4 pixels at a time by specified value.
// Aligned to 16 bytes. // Aligned to 16 bytes.
void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
...@@ -4127,7 +4128,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, ...@@ -4127,7 +4128,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
#endif #endif
); );
} }
#endif // HAS_ARGBSHADE_SSE2 #endif // HAS_ARGBSHADEROW_SSE2
#ifdef HAS_ARGBAFFINEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2
// TODO(fbarchard): Find 64 bit way to avoid masking. // TODO(fbarchard): Find 64 bit way to avoid masking.
......
...@@ -2767,9 +2767,9 @@ __asm { ...@@ -2767,9 +2767,9 @@ __asm {
} }
#endif // HAS_ARGBMIRRORROW_SSSE3 #endif // HAS_ARGBMIRRORROW_SSSE3
#ifdef HAS_SPLITUV_SSE2 #ifdef HAS_SPLITUVROW_SSE2
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
__asm { __asm {
push edi push edi
mov eax, [esp + 4 + 4] // src_uv mov eax, [esp + 4 + 4] // src_uv
...@@ -2805,8 +2805,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { ...@@ -2805,8 +2805,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
} }
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix) { int pix) {
__asm { __asm {
push edi push edi
mov eax, [esp + 4 + 4] // src_uv mov eax, [esp + 4 + 4] // src_uv
...@@ -2840,12 +2840,12 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ...@@ -2840,12 +2840,12 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
ret ret
} }
} }
#endif // HAS_SPLITUV_SSE2 #endif // HAS_SPLITUVROW_SSE2
#ifdef HAS_MERGEUV_SSE2 #ifdef HAS_MERGEUVROW_SSE2
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width) { int width) {
__asm { __asm {
push edi push edi
mov eax, [esp + 4 + 4] // src_u mov eax, [esp + 4 + 4] // src_u
...@@ -2874,8 +2874,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, ...@@ -2874,8 +2874,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
} }
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width) { uint8* dst_uv, int width) {
__asm { __asm {
push edi push edi
mov eax, [esp + 4 + 4] // src_u mov eax, [esp + 4 + 4] // src_u
...@@ -2902,7 +2902,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, ...@@ -2902,7 +2902,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
ret ret
} }
} }
#endif // HAS_MERGEUV_SSE2 #endif // HAS_MERGEUVROW_SSE2
#ifdef HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_SSE2
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
...@@ -2949,7 +2949,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int count) { ...@@ -2949,7 +2949,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int count) {
#ifdef HAS_SETROW_X86 #ifdef HAS_SETROW_X86
// SetRow8 writes 'count' bytes using a 32 bit value repeated. // SetRow8 writes 'count' bytes using a 32 bit value repeated.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void SetRow8_X86(uint8* dst, uint32 v32, int count) { void SetRow_X86(uint8* dst, uint32 v32, int count) {
__asm { __asm {
mov edx, edi mov edx, edi
mov edi, [esp + 4] // dst mov edi, [esp + 4] // dst
...@@ -2964,7 +2964,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int count) { ...@@ -2964,7 +2964,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int count) {
// SetRow32 writes 'count' words using a 32 bit value repeated. // SetRow32 writes 'count' words using a 32 bit value repeated.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
void SetRows32_X86(uint8* dst, uint32 v32, int width, void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
int dst_stride, int height) { int dst_stride, int height) {
__asm { __asm {
push esi push esi
...@@ -4125,7 +4125,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, ...@@ -4125,7 +4125,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
} }
#endif // HAS_ARGBQUANTIZEROW_SSE2 #endif // HAS_ARGBQUANTIZEROW_SSE2
#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
// Consider float CumulativeSum. // Consider float CumulativeSum.
// Consider calling CumulativeSum one row at time as needed. // Consider calling CumulativeSum one row at time as needed.
// Consider circular CumulativeSum buffer of radius * 2 + 1 height. // Consider circular CumulativeSum buffer of radius * 2 + 1 height.
...@@ -4139,8 +4139,9 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, ...@@ -4139,8 +4139,9 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
// count is number of averaged pixels to produce. // count is number of averaged pixels to produce.
// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte // Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte
// aligned. // aligned.
void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
int width, int area, uint8* dst, int count) { int width, int area, uint8* dst,
int count) {
__asm { __asm {
mov eax, topleft // eax topleft mov eax, topleft // eax topleft
mov esi, botleft // esi botleft mov esi, botleft // esi botleft
...@@ -4228,7 +4229,7 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, ...@@ -4228,7 +4229,7 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft,
l1b: l1b:
} }
} }
#endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2 #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
// Creates a table of cumulative sums where each value is a sum of all values // Creates a table of cumulative sums where each value is a sum of all values
...@@ -4315,7 +4316,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, ...@@ -4315,7 +4316,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
} }
#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2
#ifdef HAS_ARGBSHADE_SSE2 #ifdef HAS_ARGBSHADEROW_SSE2
// Shade 4 pixels at a time by specified value. // Shade 4 pixels at a time by specified value.
// Aligned to 16 bytes. // Aligned to 16 bytes.
__declspec(naked) __declspec(align(16)) __declspec(naked) __declspec(align(16))
...@@ -4349,7 +4350,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, ...@@ -4349,7 +4350,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
ret ret
} }
} }
#endif // HAS_ARGBSHADE_SSE2 #endif // HAS_ARGBSHADEROW_SSE2
#ifdef HAS_ARGBAFFINEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2
// Copy ARGB pixels from source image with slope to a row of destination. // Copy ARGB pixels from source image with slope to a row of destination.
......
...@@ -65,10 +65,10 @@ YUY2TOYROW YUY2,u,_Unaligned ...@@ -65,10 +65,10 @@ YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a, YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned YUY2TOYROW UYVY,u,_Unaligned
; void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) ; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
%macro SPLITUV 1-2 %macro SplitUVRow 1-2
cglobal SplitUV%2, 4, 4, 5, src_uv, dst_u, dst_v, pix cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
psrlw m4, m4, 8 psrlw m4, m4, 8
sub dst_vq, dst_uq sub dst_vq, dst_uq
...@@ -95,20 +95,20 @@ cglobal SplitUV%2, 4, 4, 5, src_uv, dst_u, dst_v, pix ...@@ -95,20 +95,20 @@ cglobal SplitUV%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
%endmacro %endmacro
INIT_MMX MMX INIT_MMX MMX
SPLITUV a, SplitUVRow a,
SPLITUV u,_Unaligned SplitUVRow u,_Unaligned
INIT_XMM SSE2 INIT_XMM SSE2
SPLITUV a, SplitUVRow a,
SPLITUV u,_Unaligned SplitUVRow u,_Unaligned
INIT_YMM AVX2 INIT_YMM AVX2
SPLITUV a, SplitUVRow a,
SPLITUV u,_Unaligned SplitUVRow u,_Unaligned
; void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, ; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width); ; int width);
%macro MergeUV 1-2 %macro MergeUVRow_ 1-2
cglobal MergeUV%2, 4, 4, 3, src_u, src_v, dst_uv, pix cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
sub src_vq, src_uq sub src_vq, src_uq
ALIGN 16 ALIGN 16
...@@ -128,12 +128,12 @@ cglobal MergeUV%2, 4, 4, 3, src_u, src_v, dst_uv, pix ...@@ -128,12 +128,12 @@ cglobal MergeUV%2, 4, 4, 3, src_u, src_v, dst_uv, pix
%endmacro %endmacro
INIT_MMX MMX INIT_MMX MMX
MERGEUV a, MergeUVRow_ a,
MERGEUV u,_Unaligned MergeUVRow_ u,_Unaligned
INIT_XMM SSE2 INIT_XMM SSE2
MERGEUV a, MergeUVRow_ a,
MERGEUV u,_Unaligned MergeUVRow_ u,_Unaligned
INIT_YMM AVX2 INIT_YMM AVX2
MERGEUV a, MergeUVRow_ a,
MERGEUV u,_Unaligned MergeUVRow_ u,_Unaligned
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment