Commit 1cea4235 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

RAWToJ400 for big endian RGB to grey scale.

On Pixel 3
Was
BM_ConvertToGray/1280/720/3                        2360958 ns      2334984 ns         2999
BM_ConvertToGray/1279/721/3                        2360289 ns      2334329 ns         2994
BM_ConvertGrayTensorflowCoefficients/1280/720/3    2983296 ns      2947113 ns         2259
BM_ConvertGrayTensorflowCoefficients/1279/721/3    2871205 ns      2835359 ns         2170

Now
BM_ConvertToGray/1280/720/3                        2358469 ns      2334068 ns         2997
BM_ConvertToGray/1279/721/3                        2364584 ns      2336892 ns         2995
BM_ConvertGrayTensorflowCoefficients/1280/720/3     281312 ns       278244 ns        25170
BM_ConvertGrayTensorflowCoefficients/1279/721/3     351310 ns       347229 ns        20217

BUG=libyuv:854

Change-Id: If2192affc2d3219e0fb824737d75b9374a25d709
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2003236
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
parent 6e6f81b8
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1742
Version: 1743
License: BSD
License File: LICENSE
......
......@@ -418,6 +418,15 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
int width,
int height);
// RGB big endian (rgb in memory) to J400.
LIBYUV_API
int RAWToJ400(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
#ifdef HAVE_JPEG
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
......
......@@ -374,9 +374,11 @@ extern "C" {
#define HAS_RAWTORGB24ROW_NEON
#define HAS_RAWTOUVROW_NEON
#define HAS_RAWTOYROW_NEON
#define HAS_RAWTOYJROW_NEON
#define HAS_RGB24TOARGBROW_NEON
#define HAS_RGB24TOUVROW_NEON
#define HAS_RGB24TOYROW_NEON
#define HAS_RGB24TOYJROW_NEON
#define HAS_RGB565TOARGBROW_NEON
#define HAS_RGB565TOUVROW_NEON
#define HAS_RGB565TOYROW_NEON
......@@ -1140,7 +1142,9 @@ void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_y,
......@@ -1171,7 +1175,9 @@ void BGRAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void RAWToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RAWToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
......@@ -1190,7 +1196,9 @@ void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr,
......
......@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1742
#define LIBYUV_VERSION 1743
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -1598,8 +1598,9 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
int width,
int height) {
int y;
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
#if (defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI)
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVJRow_C;
......@@ -1625,7 +1626,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
}
// Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYJROW_NEON)
#if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON;
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
......@@ -1689,16 +1690,16 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
#endif
{
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
#if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
#if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
......@@ -1715,8 +1716,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
dst_v += dst_stride_v;
}
if (height & 1) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
#if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width);
#else
......@@ -1725,8 +1726,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI))
#if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
free_aligned_buffer_64(row);
#endif
}
......@@ -1746,8 +1747,9 @@ int RAWToI420(const uint8_t* src_raw,
int width,
int height) {
int y;
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \
defined(HAS_RAWTOYROW_MMI))
#if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \
defined(HAS_RAWTOYROW_MSA) || \
defined(HAS_RAWTOYROW_MMI)
void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
uint8_t* dst_v, int width) = RAWToUVRow_C;
void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
......@@ -1772,7 +1774,7 @@ int RAWToI420(const uint8_t* src_raw,
}
// Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYROW_NEON)
#if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToUVRow = RAWToUVRow_Any_NEON;
RAWToYRow = RAWToYRow_Any_NEON;
......@@ -2398,7 +2400,7 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
src_stride_rgb24 = -src_stride_rgb24;
}
// Neon version does direct RGB24 to YUV.
// Neon version does direct RGB24 to YJ.
#if defined(HAS_RGB24TOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
......@@ -2487,6 +2489,124 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
return 0;
}
// Convert RAW to J400.
LIBYUV_API
int RAWToJ400(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height) {
int y;
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_yj, int width) =
RAWToYJRow_C;
#else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
#endif
if (!src_raw || !dst_yj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// Neon version does direct RAW to YJ.
#if defined(HAS_RAWTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToYJRow = RAWToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_NEON;
}
}
#elif defined(HAS_RAWTOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToYJRow = RAWToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_MSA;
}
}
#elif defined(HAS_RAWTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToYJRow = RAWToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_MMI;
}
}
// Other platforms do intermediate conversion from RAW to ARGB.
#else
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#endif
{
#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
RAWToYJRow(src_raw, dst_yj, width);
RAWToYJRow(src_raw + src_stride_raw, dst_yj + dst_stride_yj, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToYJRow(row, dst_yj, width);
ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width);
#endif
src_raw += src_stride_raw * 2;
dst_yj += dst_stride_yj * 2;
}
if (height & 1) {
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
RAWToYJRow(src_raw, dst_yj, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToYJRow(row, dst_yj, width);
#endif
}
#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
free_aligned_buffer_64(row);
#endif
}
return 0;
}
static void SplitPixels(const uint8_t* src_u,
int src_pixel_stride_uv,
uint8_t* dst_u,
......
......@@ -695,6 +695,9 @@ ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7)
#ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RGB24TOYJROW_NEON
ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RGB24TOYROW_MSA
ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
#endif
......@@ -704,6 +707,9 @@ ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7)
#ifdef HAS_RAWTOYROW_NEON
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RAWTOYJROW_NEON
ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RAWTOYROW_MSA
ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
#endif
......
......@@ -565,6 +565,8 @@ static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
MAKEROWYJ(ARGB, 2, 1, 0, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
#undef MAKEROWYJ
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
......
......@@ -6040,27 +6040,30 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
uint64_t y,u,v;
uint64_t b_vec[2],g_vec[2],r_vec[2];
uint64_t y, u, v;
uint64_t b_vec[2], g_vec[2], r_vec[2];
uint64_t mask = 0xff00ff00ff00ff00ULL;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
__asm__ volatile (
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
"ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
"ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
"or %[ub], %[ub], %[mask] \n\t"//must sign extension
"ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
"ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" // yg
"ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" // bb
"ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" // ub
"or %[ub], %[ub], %[mask] \n\t" // must
// sign
// extension
"ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" // bg
"ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" // ug
"punpcklbh %[ug], %[ug], %[zero] \n\t"
"pshufh %[ug], %[ug], %[zero] \n\t"
"ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
"ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" // vg
"punpcklbh %[vg], %[vg], %[zero] \n\t"
"pshufh %[vg], %[vg], %[five] \n\t"
"ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
"ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
"ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" // br
"ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" // vr
"punpcklbh %[vr], %[vr], %[zero] \n\t"
"pshufh %[vr], %[vr], %[five] \n\t"
"or %[vr], %[vr], %[mask] \n\t"//sign extension
"or %[vr], %[vr], %[mask] \n\t" // sign
// extension
"1: \n\t"
"gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
......@@ -6070,35 +6073,35 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
"gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
"gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
"punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t"//y1
"punpcklbh %[y], %[y], %[y] \n\t" // y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t" // y1
"punpcklbh %[u], %[u], %[zero] \n\t"//u
"punpcklbh %[u], %[u], %[zero] \n\t" // u
"paddsh %[b_vec0], %[y], %[bb] \n\t"
"pmullh %[b_vec1], %[u], %[ub] \n\t"
"psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
"psrah %[b_vec0], %[b_vec0], %[six] \n\t"
"punpcklbh %[v], %[v], %[zero] \n\t"//v
"punpcklbh %[v], %[v], %[zero] \n\t" // v
"paddsh %[g_vec0], %[y], %[bg] \n\t"
"pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
"pmullh %[g_vec1], %[u], %[ug] \n\t" // u*ug
"psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
"pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
"pmullh %[g_vec1], %[v], %[vg] \n\t" // v*vg
"psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
"psrah %[g_vec0], %[g_vec0], %[six] \n\t"
"paddsh %[r_vec0], %[y], %[br] \n\t"
"pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
"pmullh %[r_vec1], %[v], %[vr] \n\t" // v*vr
"psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
"psrah %[r_vec0], %[r_vec0], %[six] \n\t"
"packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
"packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
"packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t" // rrrrbbbb
"packushb %[g_vec0], %[g_vec0], %[alpha] \n\t" // ffffgggg
"punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
"punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
"punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
"punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
"punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
"punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" // gbgbgbgb
"punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" // frfrfrfr
"punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" // frgbfrgb
"punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" // frgbfrgb
"gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
"gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
"gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
......@@ -6110,23 +6113,17 @@ void I444ToARGBRow_MMI(const uint8_t* src_y,
"daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y),
[u]"=&f"(u), [v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [alpha]"f"(-1),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
[five] "f"(0x55), [mask] "f"(mask)
: "memory");
}
// Also used for 420
......@@ -6136,28 +6133,31 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
uint64_t y,u,v;
uint64_t b_vec[2],g_vec[2],r_vec[2];
uint64_t y, u, v;
uint64_t b_vec[2], g_vec[2], r_vec[2];
uint64_t mask = 0xff00ff00ff00ff00ULL;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
"ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
"ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
"or %[ub], %[ub], %[mask] \n\t"//must sign extension
"ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
"ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t" // yg
"ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t" // bb
"ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t" // ub
"or %[ub], %[ub], %[mask] \n\t" // must
// sign
// extension
"ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t" // bg
"ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t" // ug
"punpcklbh %[ug], %[ug], %[zero] \n\t"
"pshufh %[ug], %[ug], %[zero] \n\t"
"ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
"ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t" // vg
"punpcklbh %[vg], %[vg], %[zero] \n\t"
"pshufh %[vg], %[vg], %[five] \n\t"
"ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
"ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
"ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t" // br
"ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t" // vr
"punpcklbh %[vr], %[vr], %[zero] \n\t"
"pshufh %[vr], %[vr], %[five] \n\t"
"or %[vr], %[vr], %[mask] \n\t"//sign extension
"or %[vr], %[vr], %[mask] \n\t" // sign
// extension
"1: \n\t"
"gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
......@@ -6167,39 +6167,39 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
"gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
"gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
"punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t"//y1
"punpcklbh %[y], %[y], %[y] \n\t" // y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t" // y1
//u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t"//u
// u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t" // u
"punpcklbh %[u], %[u], %[zero] \n\t"
"paddsh %[b_vec0], %[y], %[bb] \n\t"
"pmullh %[b_vec1], %[u], %[ub] \n\t"
"psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
"psrah %[b_vec0], %[b_vec0], %[six] \n\t"
//v3|v2|v1|v0 --> v1|v1|v0|v0
"punpcklbh %[v], %[v], %[v] \n\t"//v
// v3|v2|v1|v0 --> v1|v1|v0|v0
"punpcklbh %[v], %[v], %[v] \n\t" // v
"punpcklbh %[v], %[v], %[zero] \n\t"
"paddsh %[g_vec0], %[y], %[bg] \n\t"
"pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
"pmullh %[g_vec1], %[u], %[ug] \n\t" // u*ug
"psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
"pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
"pmullh %[g_vec1], %[v], %[vg] \n\t" // v*vg
"psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
"psrah %[g_vec0], %[g_vec0], %[six] \n\t"
"paddsh %[r_vec0], %[y], %[br] \n\t"
"pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
"pmullh %[r_vec1], %[v], %[vr] \n\t" // v*vr
"psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
"psrah %[r_vec0], %[r_vec0], %[six] \n\t"
"packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
"packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
"packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t" // rrrrbbbb
"packushb %[g_vec0], %[g_vec0], %[alpha] \n\t" // ffffgggg
"punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
"punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
"punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
"punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
"punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
"punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t" // gbgbgbgb
"punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t" // frfrfrfr
"punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t" // frgbfrgb
"punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t" // frgbfrgb
"gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
"gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
"gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
......@@ -6212,23 +6212,17 @@ void I422ToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y),
[u]"=&f"(u), [v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [alpha]"f"(-1),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
[five] "f"(0x55), [mask] "f"(mask)
: "memory");
}
// 10 bit YUV to ARGB
......@@ -6238,10 +6232,10 @@ void I210ToARGBRow_MMI(const uint16_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
uint64_t y,u,v;
uint64_t b_vec[2],g_vec[2],r_vec[2];
uint64_t y, u, v;
uint64_t b_vec[2], g_vec[2], r_vec[2];
uint64_t mask = 0xff00ff00ff00ff00ULL;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -6316,24 +6310,18 @@ void I210ToARGBRow_MMI(const uint16_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y),
[u]"=&f"(u), [v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [alpha]"f"(-1),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask), [two]"f"(0x02),
[mask1]"f"(0x00ff00ff00ff00ff)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [alpha] "f"(-1), [six] "f"(0x6),
[five] "f"(0x55), [mask] "f"(mask), [two] "f"(0x02),
[mask1] "f"(0x00ff00ff00ff00ff)
: "memory");
}
void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
......@@ -6343,10 +6331,10 @@ void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
uint64_t y,u,v,a;
uint64_t b_vec[2],g_vec[2],r_vec[2];
uint64_t y, u, v, a;
uint64_t b_vec[2], g_vec[2], r_vec[2];
uint64_t mask = 0xff00ff00ff00ff00ULL;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -6376,18 +6364,18 @@ void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
"gslwlc1 %[a], 0x03(%[a_ptr]) \n\t"
"gslwrc1 %[a], 0x00(%[a_ptr]) \n\t"
"punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t"//y1
"punpcklbh %[y], %[y], %[y] \n\t" // y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t" // y1
//u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t"//u
// u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t" // u
"punpcklbh %[u], %[u], %[zero] \n\t"
"paddsh %[b_vec0], %[y], %[bb] \n\t"
"pmullh %[b_vec1], %[u], %[ub] \n\t"
"psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
"psrah %[b_vec0], %[b_vec0], %[six] \n\t"
//v3|v2|v1|v0 --> v1|v1|v0|v0
// v3|v2|v1|v0 --> v1|v1|v0|v0
"punpcklbh %[v], %[v], %[v] \n\t"
"punpcklbh %[v], %[v], %[zero] \n\t"
"paddsh %[g_vec0], %[y], %[bg] \n\t"
......@@ -6402,9 +6390,9 @@ void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
"psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
"psrah %[r_vec0], %[r_vec0], %[six] \n\t"
"packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
"packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t" // rrrrbbbb
"packushb %[g_vec0], %[g_vec0], %[a] \n\t"
"punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg
"punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t" // aaaagggg
"punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
"punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
"punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
......@@ -6422,23 +6410,17 @@ void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v), [a]"=&f"(a),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[a_ptr]"r"(src_a), [zero]"f"(0x00),
[six]"f"(0x6), [five]"f"(0x55),
[mask]"f"(mask)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [a] "=&f"(a),
[b_vec0] "=&f"(b_vec[0]), [b_vec1] "=&f"(b_vec[1]),
[g_vec0] "=&f"(g_vec[0]), [g_vec1] "=&f"(g_vec[1]),
[r_vec0] "=&f"(r_vec[0]), [r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub),
[ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb),
[bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [a_ptr] "r"(src_a), [zero] "f"(0x00),
[six] "f"(0x6), [five] "f"(0x55), [mask] "f"(mask)
: "memory");
}
void I422ToRGB24Row_MMI(const uint8_t* src_y,
......@@ -6447,10 +6429,10 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
uint8_t* rgb_buf,
const struct YuvConstants* yuvconstants,
int width) {
uint64_t y,u,v;
uint64_t b_vec[2],g_vec[2],r_vec[2];
uint64_t y, u, v;
uint64_t b_vec[2], g_vec[2], r_vec[2];
uint64_t mask = 0xff00ff00ff00ff00ULL;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -6478,18 +6460,18 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
"gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
"gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
"punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t"//y1
"punpcklbh %[y], %[y], %[y] \n\t" // y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t" // y1
//u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t"//u
// u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t" // u
"punpcklbh %[u], %[u], %[zero] \n\t"
"paddsh %[b_vec0], %[y], %[bb] \n\t"
"pmullh %[b_vec1], %[u], %[ub] \n\t"
"psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
"psrah %[b_vec0], %[b_vec0], %[six] \n\t"
//v3|v2|v1|v0 --> v1|v1|v0|v0
// v3|v2|v1|v0 --> v1|v1|v0|v0
"punpcklbh %[v], %[v], %[v] \n\t"
"punpcklbh %[v], %[v], %[zero] \n\t"
"paddsh %[g_vec0], %[y], %[bg] \n\t"
......@@ -6528,7 +6510,6 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
"gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t"
"gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
"daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
"daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
"daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
......@@ -6536,24 +6517,17 @@ void I422ToRGB24Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
[g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
[r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask]"f"(mask),
[lmove1]"f"(0x18), [rmove1]"f"(0x8),
[one]"f"(0x1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec0] "=&f"(b_vec[0]),
[b_vec1] "=&f"(b_vec[1]), [g_vec0] "=&f"(g_vec[0]),
[g_vec1] "=&f"(g_vec[1]), [r_vec0] "=&f"(r_vec[0]),
[r_vec1] "=&f"(r_vec[1]), [ub] "=&f"(ub), [ug] "=&f"(ug),
[vg] "=&f"(vg), [vr] "=&f"(vr), [bb] "=&f"(bb), [bg] "=&f"(bg),
[br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask] "f"(mask), [lmove1] "f"(0x18), [rmove1] "f"(0x8), [one] "f"(0x1)
: "memory");
}
void I422ToARGB4444Row_MMI(const uint8_t* src_y,
......@@ -6564,7 +6538,7 @@ void I422ToARGB4444Row_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -6592,18 +6566,18 @@ void I422ToARGB4444Row_MMI(const uint8_t* src_y,
"gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
"gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
"punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t"//y1
"punpcklbh %[y], %[y], %[y] \n\t" // y*0x0101
"pmulhuh %[y], %[y], %[yg] \n\t" // y1
//u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t"//u
// u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t" // u
"punpcklbh %[u], %[u], %[zero] \n\t"
"paddsh %[b_vec], %[y], %[bb] \n\t"
"pmullh %[temp], %[u], %[ub] \n\t"
"psubsh %[b_vec], %[b_vec], %[temp] \n\t"
"psrah %[b_vec], %[b_vec], %[six] \n\t"
//v3|v2|v1|v0 --> v1|v1|v0|v0
// v3|v2|v1|v0 --> v1|v1|v0|v0
"punpcklbh %[v], %[v], %[v] \n\t"
"punpcklbh %[v], %[v], %[zero] \n\t"
"paddsh %[g_vec], %[y], %[bg] \n\t"
......@@ -6651,23 +6625,16 @@ void I422ToARGB4444Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00),
[four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[dst_argb4444] "r"(dst_argb4444), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask] "f"(0xff00ff00ff00ff00), [four] "f"(0x4),
[mask1] "f"(0xf0f0f0f0f0f0f0f0), [alpha] "f"(-1)
: "memory");
}
void I422ToARGB1555Row_MMI(const uint8_t* src_y,
......@@ -6678,7 +6645,7 @@ void I422ToARGB1555Row_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -6709,7 +6676,7 @@ void I422ToARGB1555Row_MMI(const uint8_t* src_y,
"punpcklbh %[y], %[y], %[y] \n\t"
"pmulhuh %[y], %[y], %[yg] \n\t"
//u3|u2|u1|u0 --> u1|u1|u0|u0
// u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t"
"punpcklbh %[u], %[u], %[zero] \n\t"
"paddsh %[b_vec], %[y], %[bb] \n\t"
......@@ -6717,7 +6684,7 @@ void I422ToARGB1555Row_MMI(const uint8_t* src_y,
"psubsh %[b_vec], %[b_vec], %[temp] \n\t"
"psrah %[b_vec], %[b_vec], %[six] \n\t"
//v3|v2|v1|v0 --> v1|v1|v0|v0
// v3|v2|v1|v0 --> v1|v1|v0|v0
"punpcklbh %[v], %[v], %[v] \n\t"
"punpcklbh %[v], %[v], %[zero] \n\t"
"paddsh %[g_vec], %[y], %[bg] \n\t"
......@@ -6779,24 +6746,17 @@ void I422ToARGB1555Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[three]"f"(0x3), [mask2]"f"(0x1f0000001f),
[eight]"f"(0x8), [mask3]"f"(0x800000008000),
[lmove5]"f"(0x5)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[dst_argb1555] "r"(dst_argb1555), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
[mask2] "f"(0x1f0000001f), [eight] "f"(0x8),
[mask3] "f"(0x800000008000), [lmove5] "f"(0x5)
: "memory");
}
void I422ToRGB565Row_MMI(const uint8_t* src_y,
......@@ -6807,7 +6767,7 @@ void I422ToRGB565Row_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -6838,7 +6798,7 @@ void I422ToRGB565Row_MMI(const uint8_t* src_y,
"punpcklbh %[y], %[y], %[y] \n\t"
"pmulhuh %[y], %[y], %[yg] \n\t"
//u3|u2|u1|u0 --> u1|u1|u0|u0
// u3|u2|u1|u0 --> u1|u1|u0|u0
"punpcklbh %[u], %[u], %[u] \n\t"
"punpcklbh %[u], %[u], %[zero] \n\t"
"paddsh %[b_vec], %[y], %[bb] \n\t"
......@@ -6846,7 +6806,7 @@ void I422ToRGB565Row_MMI(const uint8_t* src_y,
"psubsh %[b_vec], %[b_vec], %[temp] \n\t"
"psrah %[b_vec], %[b_vec], %[six] \n\t"
//v3|v2|v1|v0 --> v1|v1|v0|v0
// v3|v2|v1|v0 --> v1|v1|v0|v0
"punpcklbh %[v], %[v], %[v] \n\t"
"punpcklbh %[v], %[v], %[zero] \n\t"
"paddsh %[g_vec], %[y], %[bg] \n\t"
......@@ -6910,24 +6870,17 @@ void I422ToRGB565Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[three]"f"(0x3), [mask2]"f"(0x1f0000001f),
[eight]"f"(0x8), [seven]"f"(0x7),
[lmove5]"f"(0x5)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[dst_rgb565] "r"(dst_rgb565), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask1] "f"(0xff00ff00ff00ff00), [three] "f"(0x3),
[mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7),
[lmove5] "f"(0x5)
: "memory");
}
void NV12ToARGBRow_MMI(const uint8_t* src_y,
......@@ -6937,7 +6890,7 @@ void NV12ToARGBRow_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7005,23 +6958,15 @@ void NV12ToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
: "memory");
}
void NV21ToARGBRow_MMI(const uint8_t* src_y,
......@@ -7031,7 +6976,7 @@ void NV21ToARGBRow_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7099,23 +7044,15 @@ void NV21ToARGBRow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1)
: "memory");
}
void NV12ToRGB24Row_MMI(const uint8_t* src_y,
......@@ -7125,7 +7062,7 @@ void NV12ToRGB24Row_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7204,24 +7141,16 @@ void NV12ToRGB24Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1), [lmove1]"f"(0x18),
[one]"f"(0x1), [rmove1]"f"(0x8)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [lmove1] "f"(0x18),
[one] "f"(0x1), [rmove1] "f"(0x8)
: "memory");
}
void NV21ToRGB24Row_MMI(const uint8_t* src_y,
......@@ -7231,7 +7160,7 @@ void NV21ToRGB24Row_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7310,24 +7239,16 @@ void NV21ToRGB24Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
[rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[lmove1]"f"(0x18), [rmove1]"f"(0x8),
[one]"f"(0x1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [vu_ptr] "r"(src_vu), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [lmove1] "f"(0x18),
[rmove1] "f"(0x8), [one] "f"(0x1)
: "memory");
}
void NV12ToRGB565Row_MMI(const uint8_t* src_y,
......@@ -7337,7 +7258,7 @@ void NV12ToRGB565Row_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7398,7 +7319,7 @@ void NV12ToRGB565Row_MMI(const uint8_t* src_y,
"psrlw %[temp], %[temp], %[seven] \n\t"
"psrlw %[r_vec], %[mask1], %[eight] \n\t"
"and %[r_vec], %[temp], %[r_vec] \n\t"
"psubb %[y], %[eight], %[three] \n\t"//5
"psubb %[y], %[eight], %[three] \n\t" // 5
"psllw %[r_vec], %[r_vec], %[y] \n\t"
"or %[g_vec], %[g_vec], %[r_vec] \n\t"
"paddb %[r_vec], %[three], %[six] \n\t"
......@@ -7413,7 +7334,7 @@ void NV12ToRGB565Row_MMI(const uint8_t* src_y,
"psrlw %[temp], %[temp], %[seven] \n\t"
"psrlw %[r_vec], %[mask1], %[eight] \n\t"
"and %[r_vec], %[temp], %[r_vec] \n\t"
"psubb %[y], %[eight], %[three] \n\t"//5
"psubb %[y], %[eight], %[three] \n\t" // 5
"psllw %[r_vec], %[r_vec], %[y] \n\t"
"or %[b_vec], %[b_vec], %[r_vec] \n\t"
"paddb %[r_vec], %[three], %[six] \n\t"
......@@ -7436,24 +7357,16 @@ void NV12ToRGB565Row_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
[dst_rgb565]"r"(dst_rgb565),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[three]"f"(0x3), [mask2]"f"(0x1f0000001f),
[eight]"f"(0x8), [seven]"f"(0x7)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [uv_ptr] "r"(src_uv), [dst_rgb565] "r"(dst_rgb565),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [three] "f"(0x3),
[mask2] "f"(0x1f0000001f), [eight] "f"(0x8), [seven] "f"(0x7)
: "memory");
}
void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
......@@ -7462,7 +7375,7 @@ void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7530,22 +7443,15 @@ void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1), [eight]"f"(0x8)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [yuy2_ptr] "r"(src_yuy2), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
: "memory");
}
void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
......@@ -7554,7 +7460,7 @@ void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7622,22 +7528,15 @@ void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[ushu]"f"(0xA0), [vshu]"f"(0xf5),
[alpha]"f"(-1), [eight]"f"(0x8)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [uyvy_ptr] "r"(src_uyvy), [rgbbuf_ptr] "r"(rgb_buf),
[yuvcons_ptr] "r"(yuvconstants), [width] "r"(width), [zero] "f"(0x00),
[five] "f"(0x55), [six] "f"(0x6), [mask1] "f"(0xff00ff00ff00ff00),
[ushu] "f"(0xA0), [vshu] "f"(0xf5), [alpha] "f"(-1), [eight] "f"(0x8)
: "memory");
}
void I422ToRGBARow_MMI(const uint8_t* src_y,
......@@ -7648,7 +7547,7 @@ void I422ToRGBARow_MMI(const uint8_t* src_y,
int width) {
uint64_t y, u, v;
uint64_t b_vec, g_vec, r_vec, temp;
uint64_t ub,ug,vg,vr,bb,bg,br,yg;
uint64_t ub, ug, vg, vr, bb, bg, br, yg;
__asm__ volatile(
"ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
......@@ -7720,26 +7619,19 @@ void I422ToRGBARow_MMI(const uint8_t* src_y,
"daddi %[width], %[width], -0x04 \n\t"
"bnez %[width], 1b \n\t"
: [y]"=&f"(y), [u]"=&f"(u),
[v]"=&f"(v),
[b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
[r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
[ub]"=&f"(ub), [ug]"=&f"(ug),
[vg]"=&f"(vg), [vr]"=&f"(vr),
[bb]"=&f"(bb), [bg]"=&f"(bg),
[br]"=&f"(br), [yg]"=&f"(yg)
: [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
[v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
[yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
[zero]"f"(0x00), [five]"f"(0x55),
[six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
[alpha]"f"(-1)
: "memory"
);
: [y] "=&f"(y), [u] "=&f"(u), [v] "=&f"(v), [b_vec] "=&f"(b_vec),
[g_vec] "=&f"(g_vec), [r_vec] "=&f"(r_vec), [temp] "=&f"(temp),
[ub] "=&f"(ub), [ug] "=&f"(ug), [vg] "=&f"(vg), [vr] "=&f"(vr),
[bb] "=&f"(bb), [bg] "=&f"(bg), [br] "=&f"(br), [yg] "=&f"(yg)
: [y_ptr] "r"(src_y), [u_ptr] "r"(src_u), [v_ptr] "r"(src_v),
[rgbbuf_ptr] "r"(rgb_buf), [yuvcons_ptr] "r"(yuvconstants),
[width] "r"(width), [zero] "f"(0x00), [five] "f"(0x55), [six] "f"(0x6),
[mask1] "f"(0xff00ff00ff00ff00), [alpha] "f"(-1)
: "memory");
}
void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
__asm__ volatile (
__asm__ volatile(
"punpcklwd %[v32], %[v32], %[v32] \n\t"
"1: \n\t"
"gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t"
......@@ -7750,10 +7642,9 @@ void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
"daddi %[width], %[width], -0x04 \n\t"
"daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
"bnez %[width], 1b \n\t"
: [v32]"+&f"(v32)
: [dst_ptr]"r"(dst_argb), [width]"r"(width)
: "memory"
);
: [v32] "+&f"(v32)
: [dst_ptr] "r"(dst_argb), [width] "r"(width)
: "memory");
}
// 10 bit YUV to ARGB
......
......@@ -2065,6 +2065,49 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
}
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
asm volatile(
"vmov.u8 d4, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d5, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d6, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q4, d0, d4 \n" // B
"vmlal.u8 q4, d1, d5 \n" // G
"vmlal.u8 q4, d2, d6 \n" // R
"vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "q4");
}
void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
asm volatile(
"vmov.u8 d6, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d5, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d4, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q4, d0, d4 \n" // B
"vmlal.u8 q4, d1, d5 \n" // G
"vmlal.u8 q4, d2, d6 \n" // R
"vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "q4");
}
// Bilinear filter 16x2 -> 16x1
void InterpolateRow_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
......
......@@ -2103,6 +2103,48 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
}
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
asm volatile(
"movi v4.8b, #29 \n" // B * 0.1140 coefficient
"movi v5.8b, #150 \n" // G * 0.5870 coefficient
"movi v6.8b, #77 \n" // R * 0.2990 coefficient
"1: \n"
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v0.8h, v0.8b, v4.8b \n" // B
"umlal v0.8h, v1.8b, v5.8b \n" // G
"umlal v0.8h, v2.8b, v6.8b \n" // R
"uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"b.gt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
}
void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
asm volatile(
"movi v6.8b, #29 \n" // B * 0.1140 coefficient
"movi v5.8b, #150 \n" // G * 0.5870 coefficient
"movi v4.8b, #77 \n" // R * 0.2990 coefficient
"1: \n"
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v0.8h, v0.8b, v4.8b \n" // B
"umlal v0.8h, v1.8b, v5.8b \n" // G
"umlal v0.8h, v2.8b, v6.8b \n" // R
"uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"b.gt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
}
// Bilinear filter 16x2 -> 16x1
void InterpolateRow_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,
......
......@@ -1245,6 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0)
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
TESTATOB(RAW, 3, 3, 1, J400, 1, 1, 1, 0)
#ifdef INTEL_TEST
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment