Commit 1cea4235 authored by Frank Barchard's avatar Frank Barchard Committed by Commit Bot

RAWToJ400 for big endian RGB to grey scale.

On Pixel 3
Was
BM_ConvertToGray/1280/720/3                        2360958 ns      2334984 ns         2999
BM_ConvertToGray/1279/721/3                        2360289 ns      2334329 ns         2994
BM_ConvertGrayTensorflowCoefficients/1280/720/3    2983296 ns      2947113 ns         2259
BM_ConvertGrayTensorflowCoefficients/1279/721/3    2871205 ns      2835359 ns         2170

Now
BM_ConvertToGray/1280/720/3                        2358469 ns      2334068 ns         2997
BM_ConvertToGray/1279/721/3                        2364584 ns      2336892 ns         2995
BM_ConvertGrayTensorflowCoefficients/1280/720/3     281312 ns       278244 ns        25170
BM_ConvertGrayTensorflowCoefficients/1279/721/3     351310 ns       347229 ns        20217

BUG=libyuv:854

Change-Id: If2192affc2d3219e0fb824737d75b9374a25d709
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2003236
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: 's avatarrichard winterton <rrwinterton@gmail.com>
parent 6e6f81b8
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1742 Version: 1743
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -418,6 +418,15 @@ int RGB24ToJ400(const uint8_t* src_rgb24, ...@@ -418,6 +418,15 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
int width, int width,
int height); int height);
// RGB big endian (rgb in memory) to J400.
LIBYUV_API
int RAWToJ400(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
#ifdef HAVE_JPEG #ifdef HAVE_JPEG
// src_width/height provided by capture. // src_width/height provided by capture.
// dst_width/height for clipping determine final size. // dst_width/height for clipping determine final size.
......
...@@ -374,9 +374,11 @@ extern "C" { ...@@ -374,9 +374,11 @@ extern "C" {
#define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTORGB24ROW_NEON
#define HAS_RAWTOUVROW_NEON #define HAS_RAWTOUVROW_NEON
#define HAS_RAWTOYROW_NEON #define HAS_RAWTOYROW_NEON
#define HAS_RAWTOYJROW_NEON
#define HAS_RGB24TOARGBROW_NEON #define HAS_RGB24TOARGBROW_NEON
#define HAS_RGB24TOUVROW_NEON #define HAS_RGB24TOUVROW_NEON
#define HAS_RGB24TOYROW_NEON #define HAS_RGB24TOYROW_NEON
#define HAS_RGB24TOYJROW_NEON
#define HAS_RGB565TOARGBROW_NEON #define HAS_RGB565TOARGBROW_NEON
#define HAS_RGB565TOUVROW_NEON #define HAS_RGB565TOUVROW_NEON
#define HAS_RGB565TOYROW_NEON #define HAS_RGB565TOYROW_NEON
...@@ -1140,7 +1142,9 @@ void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); ...@@ -1140,7 +1142,9 @@ void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_y, uint8_t* dst_y,
...@@ -1171,7 +1175,9 @@ void BGRAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width); ...@@ -1171,7 +1175,9 @@ void BGRAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width); void ABGRToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width); void RGBAToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width); void RGB24ToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void RAWToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width); void RAWToYRow_C(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RAWToYJRow_C(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
...@@ -1190,7 +1196,9 @@ void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); ...@@ -1190,7 +1196,9 @@ void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB24ToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr, void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_ptr, uint8_t* dst_ptr,
......
...@@ -11,6 +11,6 @@ ...@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1742 #define LIBYUV_VERSION 1743
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -1598,8 +1598,9 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ...@@ -1598,8 +1598,9 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
int width, int width,
int height) { int height) {
int y; int y;
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ #if (defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MMI)) defined(HAS_RGB24TOYJROW_MSA) || \
defined(HAS_RGB24TOYJROW_MMI)
void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24, void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) = uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVJRow_C; RGB24ToUVJRow_C;
...@@ -1625,7 +1626,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ...@@ -1625,7 +1626,7 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
} }
// Neon version does direct RGB24 to YUV. // Neon version does direct RGB24 to YUV.
#if defined(HAS_RGB24TOYJROW_NEON) #if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON; RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON;
RGB24ToYJRow = RGB24ToYJRow_Any_NEON; RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
...@@ -1689,16 +1690,16 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ...@@ -1689,16 +1690,16 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
#endif #endif
{ {
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ #if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MMI)) defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31; const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2); align_buffer_64(row, kRowSize * 2);
#endif #endif
for (y = 0; y < height - 1; y += 2) { for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ #if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MMI)) defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width); RGB24ToYJRow(src_rgb24, dst_y, width);
RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width);
...@@ -1715,8 +1716,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ...@@ -1715,8 +1716,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
#if (defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ #if ((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MMI)) defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width);
RGB24ToYJRow(src_rgb24, dst_y, width); RGB24ToYJRow(src_rgb24, dst_y, width);
#else #else
...@@ -1725,8 +1726,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24, ...@@ -1725,8 +1726,8 @@ int RGB24ToJ420(const uint8_t* src_rgb24,
ARGBToYJRow(row, dst_y, width); ARGBToYJRow(row, dst_y, width);
#endif #endif
} }
#if !(defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_MSA) || \ #if !((defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON)) || \
defined(HAS_RGB24TOYJROW_MMI)) defined(HAS_RGB24TOYJROW_MSA) || defined(HAS_RGB24TOYJROW_MMI))
free_aligned_buffer_64(row); free_aligned_buffer_64(row);
#endif #endif
} }
...@@ -1746,8 +1747,9 @@ int RAWToI420(const uint8_t* src_raw, ...@@ -1746,8 +1747,9 @@ int RAWToI420(const uint8_t* src_raw,
int width, int width,
int height) { int height) {
int y; int y;
#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA) || \ #if (defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)) || \
defined(HAS_RAWTOYROW_MMI)) defined(HAS_RAWTOYROW_MSA) || \
defined(HAS_RAWTOYROW_MMI)
void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u,
uint8_t* dst_v, int width) = RAWToUVRow_C; uint8_t* dst_v, int width) = RAWToUVRow_C;
void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
...@@ -1772,7 +1774,7 @@ int RAWToI420(const uint8_t* src_raw, ...@@ -1772,7 +1774,7 @@ int RAWToI420(const uint8_t* src_raw,
} }
// Neon version does direct RAW to YUV. // Neon version does direct RAW to YUV.
#if defined(HAS_RAWTOYROW_NEON) #if defined(HAS_RAWTOYROW_NEON) && defined(HAS_RAWTOUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
RAWToUVRow = RAWToUVRow_Any_NEON; RAWToUVRow = RAWToUVRow_Any_NEON;
RAWToYRow = RAWToYRow_Any_NEON; RAWToYRow = RAWToYRow_Any_NEON;
...@@ -2398,7 +2400,7 @@ int RGB24ToJ400(const uint8_t* src_rgb24, ...@@ -2398,7 +2400,7 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
src_stride_rgb24 = -src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24;
} }
// Neon version does direct RGB24 to YUV. // Neon version does direct RGB24 to YJ.
#if defined(HAS_RGB24TOYJROW_NEON) #if defined(HAS_RGB24TOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
RGB24ToYJRow = RGB24ToYJRow_Any_NEON; RGB24ToYJRow = RGB24ToYJRow_Any_NEON;
...@@ -2487,6 +2489,124 @@ int RGB24ToJ400(const uint8_t* src_rgb24, ...@@ -2487,6 +2489,124 @@ int RGB24ToJ400(const uint8_t* src_rgb24,
return 0; return 0;
} }
// Convert RAW to J400.
LIBYUV_API
int RAWToJ400(const uint8_t* src_raw,
int src_stride_raw,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height) {
int y;
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_yj, int width) =
RAWToYJRow_C;
#else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
#endif
if (!src_raw || !dst_yj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// Neon version does direct RAW to YJ.
#if defined(HAS_RAWTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
RAWToYJRow = RAWToYJRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_NEON;
}
}
#elif defined(HAS_RAWTOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
RAWToYJRow = RAWToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
RAWToYJRow = RAWToYJRow_MSA;
}
}
#elif defined(HAS_RAWTOYJROW_MMI)
if (TestCpuFlag(kCpuHasMMI)) {
RAWToYJRow = RAWToYJRow_Any_MMI;
if (IS_ALIGNED(width, 8)) {
RAWToYJRow = RAWToYJRow_MMI;
}
}
// Other platforms do intermediate conversion from RAW to ARGB.
#else
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToYJRow = ARGBToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToYJRow = ARGBToYJRow_AVX2;
}
}
#endif
#endif
{
#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
#endif
for (y = 0; y < height - 1; y += 2) {
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
RAWToYJRow(src_raw, dst_yj, width);
RAWToYJRow(src_raw + src_stride_raw, dst_yj + dst_stride_yj, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToYJRow(row, dst_yj, width);
ARGBToYJRow(row + kRowSize, dst_yj + dst_stride_yj, width);
#endif
src_raw += src_stride_raw * 2;
dst_yj += dst_stride_yj * 2;
}
if (height & 1) {
#if (defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
RAWToYJRow(src_raw, dst_yj, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToYJRow(row, dst_yj, width);
#endif
}
#if !(defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) || \
defined(HAS_RAWTOYJROW_MMI))
free_aligned_buffer_64(row);
#endif
}
return 0;
}
static void SplitPixels(const uint8_t* src_u, static void SplitPixels(const uint8_t* src_u,
int src_pixel_stride_uv, int src_pixel_stride_uv,
uint8_t* dst_u, uint8_t* dst_u,
......
...@@ -695,6 +695,9 @@ ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7) ...@@ -695,6 +695,9 @@ ANY11(RGBAToYRow_Any_MMI, RGBAToYRow_MMI, 0, 4, 1, 7)
#ifdef HAS_RGB24TOYROW_NEON #ifdef HAS_RGB24TOYROW_NEON
ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7)
#endif #endif
#ifdef HAS_RGB24TOYJROW_NEON
ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RGB24TOYROW_MSA #ifdef HAS_RGB24TOYROW_MSA
ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15)
#endif #endif
...@@ -704,6 +707,9 @@ ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7) ...@@ -704,6 +707,9 @@ ANY11(RGB24ToYRow_Any_MMI, RGB24ToYRow_MMI, 0, 3, 1, 7)
#ifdef HAS_RAWTOYROW_NEON #ifdef HAS_RAWTOYROW_NEON
ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7)
#endif #endif
#ifdef HAS_RAWTOYJROW_NEON
ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 7)
#endif
#ifdef HAS_RAWTOYROW_MSA #ifdef HAS_RAWTOYROW_MSA
ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15)
#endif #endif
......
...@@ -565,6 +565,8 @@ static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { ...@@ -565,6 +565,8 @@ static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
MAKEROWYJ(ARGB, 2, 1, 0, 4) MAKEROWYJ(ARGB, 2, 1, 0, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4) MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
#undef MAKEROWYJ #undef MAKEROWYJ
void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
......
This diff is collapsed.
...@@ -2065,6 +2065,49 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { ...@@ -2065,6 +2065,49 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8");
} }
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
asm volatile(
"vmov.u8 d4, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d5, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d6, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q4, d0, d4 \n" // B
"vmlal.u8 q4, d1, d5 \n" // G
"vmlal.u8 q4, d2, d6 \n" // R
"vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "q4");
}
void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
asm volatile(
"vmov.u8 d6, #29 \n" // B * 0.1140 coefficient
"vmov.u8 d5, #150 \n" // G * 0.5870 coefficient
"vmov.u8 d4, #77 \n" // R * 0.2990 coefficient
"1: \n"
"vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW.
"subs %2, %2, #8 \n" // 8 processed per loop.
"vmull.u8 q4, d0, d4 \n" // B
"vmlal.u8 q4, d1, d5 \n" // G
"vmlal.u8 q4, d2, d6 \n" // R
"vqrshrn.u16 d0, q4, #8 \n" // 16 bit to 8 bit Y
"vst1.8 {d0}, [%1]! \n" // store 8 pixels Y.
"bgt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "q4");
}
// Bilinear filter 16x2 -> 16x1 // Bilinear filter 16x2 -> 16x1
void InterpolateRow_NEON(uint8_t* dst_ptr, void InterpolateRow_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr, const uint8_t* src_ptr,
......
...@@ -2103,6 +2103,48 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { ...@@ -2103,6 +2103,48 @@ void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
} }
void RGB24ToYJRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
asm volatile(
"movi v4.8b, #29 \n" // B * 0.1140 coefficient
"movi v5.8b, #150 \n" // G * 0.5870 coefficient
"movi v6.8b, #77 \n" // R * 0.2990 coefficient
"1: \n"
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v0.8h, v0.8b, v4.8b \n" // B
"umlal v0.8h, v1.8b, v5.8b \n" // G
"umlal v0.8h, v2.8b, v6.8b \n" // R
"uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"b.gt 1b \n"
: "+r"(src_rgb24), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
}
void RAWToYJRow_NEON(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
asm volatile(
"movi v6.8b, #29 \n" // B * 0.1140 coefficient
"movi v5.8b, #150 \n" // G * 0.5870 coefficient
"movi v4.8b, #77 \n" // R * 0.2990 coefficient
"1: \n"
"ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels.
"subs %w2, %w2, #8 \n" // 8 processed per loop.
"umull v0.8h, v0.8b, v4.8b \n" // B
"umlal v0.8h, v1.8b, v5.8b \n" // G
"umlal v0.8h, v2.8b, v6.8b \n" // R
"uqrshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit Y
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y.
"b.gt 1b \n"
: "+r"(src_raw), // %0
"+r"(dst_yj), // %1
"+r"(width) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
}
// Bilinear filter 16x2 -> 16x1 // Bilinear filter 16x2 -> 16x1
void InterpolateRow_NEON(uint8_t* dst_ptr, void InterpolateRow_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr, const uint8_t* src_ptr,
......
...@@ -1245,6 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0) ...@@ -1245,6 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0)
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0) TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0) TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
TESTATOB(RAW, 3, 3, 1, J400, 1, 1, 1, 0)
#ifdef INTEL_TEST #ifdef INTEL_TEST
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0) TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment