Commit 2d11d43a authored by fbarchard@google.com's avatar fbarchard@google.com

shuffle functions so convert.h is all formats to I420 and convert_from.h is from…

shuffle functions so convert.h is all formats to I420 and convert_from.h is from I420 to all formats
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/395006

git-svn-id: http://libyuv.googlecode.com/svn/trunk@174 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 032b5f99
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 173 Version: 174
License: BSD License: BSD
License File: LICENSE License File: LICENSE
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare.h" #include "libyuv/compare.h"
#include "libyuv/convert.h" #include "libyuv/convert.h"
#include "libyuv/convert_from.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h" #include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define INCLUDE_LIBYUV_CONVERT_H_ #define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h" #include "libyuv/rotate.h"
#ifdef __cplusplus #ifdef __cplusplus
...@@ -19,116 +20,141 @@ namespace libyuv { ...@@ -19,116 +20,141 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// RGB24 is also known as 24BG and BGR3 // Copy I420 to I420.
int I420ToRGB24(const uint8* src_y, int src_stride_y, int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame, uint8* dst_y, int dst_stride_y,
int width, int height); uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RAW is also known as RGB3 // Convert I444 to I420.
int I420ToRAW(const uint8* src_y, int src_stride_y, int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame, uint8* dst_y, int dst_stride_y,
int width, int height); uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToARGB4444(const uint8* src_y, int src_stride_y, // Convert I400 (grey) to I420.
const uint8* src_u, int src_stride_u, int I400ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_u, int dst_stride_u,
int width, int height); uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToRGB565(const uint8* src_y, int src_stride_y, // Convert NV12 to I420. Also used for NV21.
const uint8* src_u, int src_stride_u, int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, const uint8* src_uv, int src_stride_uv,
uint8* dst_frame, int dst_stride_frame, uint8* dst_y, int dst_stride_y,
int width, int height); uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToARGB1555(const uint8* src_y, int src_stride_y, // Convert M420 to I420.
const uint8* src_u, int src_stride_u, int M420ToI420(const uint8* src_m420, int src_stride_m420,
const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_u, int dst_stride_u,
int width, int height); uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToYUY2(const uint8* src_y, int src_stride_y, // Convert Q420 to I420.
const uint8* src_u, int src_stride_u, int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_frame, int dst_stride_frame, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
int I422ToYUY2(const uint8* src_y, int src_stride_y, // Convert YUY2 to I420.
const uint8* src_u, int src_stride_u, int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
int I420ToUYVY(const uint8* src_y, int src_stride_y, // Convert UYVY to I420.
const uint8* src_u, int src_stride_u, int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
int I422ToUYVY(const uint8* src_y, int src_stride_y, // Convert V210 to I420.
const uint8* src_u, int src_stride_u, int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,
const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
int I420ToV210(const uint8* src_y, int src_stride_y, // ARGB little endian (bgra in memory) to I420
const uint8* src_u, int src_stride_u, int ARGBToI420(const uint8* src_frame, int src_stride_frame,
const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// BGRA little endian (argb in memory) to I420
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// ABGR little endian (rgba in memory) to I420
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB little endian (bgr in memory) to I420
int RGB24ToI420(const uint8* src_frame, int src_stride_frame, int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB big endian (rgb in memory) to I420
int RAWToI420(const uint8* src_frame, int src_stride_frame, int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB16 (RGBP fourcc) little endian to I420
int RGB565ToI420(const uint8* src_frame, int src_stride_frame, int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB15 (RGBO fourcc) little endian to I420
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// RGB12 (R444 fourcc) little endian to I420
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
int ABGRToI420(const uint8* src_frame, int src_stride_frame, // Note Bayer formats (BGGR) To I420 are in format_conversion.h
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert camera sample to I420 with cropping, rotation and vertical flip. // Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG. // "src_size" is needed to parse MJPG.
...@@ -162,16 +188,6 @@ int ConvertToI420(const uint8* src_frame, size_t src_size, ...@@ -162,16 +188,6 @@ int ConvertToI420(const uint8* src_frame, size_t src_size,
RotationMode rotation, RotationMode rotation,
uint32 format); uint32 format);
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// See Also convert.h for conversions from formats to I420
// I420Copy in convert to I420ToI420
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// TODO(fbarchard): I420ToNV12
// TODO(fbarchard): I420ToM420
// TODO(fbarchard): I420ToQ420
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
...@@ -18,53 +18,17 @@ namespace libyuv { ...@@ -18,53 +18,17 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// Copy I420 to I420. void SetPlane(uint8* dst_y, int dst_stride_y,
int I420Copy(const uint8* src_y, int src_stride_y, int width, int height,
const uint8* src_u, int src_stride_u, uint32 value);
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// I420 mirror
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I444 to I420. // Copy a plane of data (I420 to I400)
int I444ToI420(const uint8* src_y, int src_stride_y, void CopyPlane(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// Convert I420 to I444. // I420 mirror
int I420ToI444(const uint8* src_y, int src_stride_y, int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
...@@ -72,13 +36,6 @@ int I420ToI444(const uint8* src_y, int src_stride_y, ...@@ -72,13 +36,6 @@ int I420ToI444(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height); int width, int height);
// Convert I400 (grey) to I420.
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert NV12 to ARGB. Also used for NV21. // Convert NV12 to ARGB. Also used for NV21.
int NV12ToARGB(const uint8* src_y, int src_stride_y, int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv, const uint8* src_uv, int src_stride_uv,
...@@ -91,76 +48,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y, ...@@ -91,76 +48,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Convert NV12 to I420. Also used for NV21.
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert Q420 to I420.
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert M420 to I420.
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I420.
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I420.
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert V210 to I420.
int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I420 to ARGB.
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 to BGRA.
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 to ABGR.
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to ARGB. // Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y, int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
...@@ -242,9 +129,16 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb, ...@@ -242,9 +129,16 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb, uint8* dst_argb, int dst_stride_argb,
int width, int height); int width, int height);
// Copy a plane of data int I422ToYUY2(const uint8* src_y, int src_stride_y,
void CopyPlane(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u,
uint8* dst_y, int dst_stride_y, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height); int width, int height);
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 173 #define LIBYUV_VERSION 174
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
# includes # includes
'include/libyuv/basic_types.h', 'include/libyuv/basic_types.h',
'include/libyuv/convert.h', 'include/libyuv/convert.h',
'include/libyuv/convert_from.h',
'include/libyuv/scale.h', 'include/libyuv/scale.h',
'include/libyuv/planar_functions.h', 'include/libyuv/planar_functions.h',
'include/libyuv/video_common.h', 'include/libyuv/video_common.h',
...@@ -37,7 +38,7 @@ ...@@ -37,7 +38,7 @@
# sources # sources
'source/compare.cc', 'source/compare.cc',
'source/convert.cc', 'source/convert.cc',
'source/convertfrom.cc', 'source/convert_from.cc',
'source/cpu_id.cc', 'source/cpu_id.cc',
'source/format_conversion.cc', 'source/format_conversion.cc',
'source/planar_functions.cc', 'source/planar_functions.cc',
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include "libyuv/convert.h" #include "libyuv/convert.h"
#include <string.h> // For memcpy()
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h" #include "libyuv/format_conversion.h"
...@@ -23,477 +25,519 @@ namespace libyuv { ...@@ -23,477 +25,519 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// YUY2 - Macro-pixel = 2 image pixels // Copy I420 with optional flipping
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
// UYVY - Macro-pixel = 2 image pixels const uint8* src_v, int src_stride_v,
// U0Y0V0Y1 uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) uint8* dst_v, int dst_stride_v,
#define HAS_I42XTOYUY2ROW_SSE2 int width, int height) {
__declspec(naked) if (!src_y || !src_u || !src_v ||
static void I42xToYUY2Row_SSE2(const uint8* src_y, !dst_y || !dst_u || !dst_v ||
const uint8* src_u, width <= 0 || height == 0) {
const uint8* src_v, return -1;
uint8* dst_frame, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
lea eax, [eax + 16]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2 // YUYV
punpckhbw xmm1, xmm2
movdqa [edi], xmm0
movdqa [edi + 16], xmm1
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
} }
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
} }
#define HAS_I42XTOUYVYROW_SSE2 #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_HALFROW_SSE2
__declspec(naked) __declspec(naked)
static void I42xToUYVYRow_SSE2(const uint8* src_y, static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
const uint8* src_u, uint8* dst_uv, int pix) {
const uint8* src_v,
uint8* dst_frame, int width) {
__asm { __asm {
push esi
push edi push edi
mov eax, [esp + 8 + 4] // src_y mov eax, [esp + 4 + 4] // src_uv
mov esi, [esp + 8 + 8] // src_u mov edx, [esp + 4 + 8] // src_uv_stride
mov edx, [esp + 8 + 12] // src_v mov edi, [esp + 4 + 12] // dst_v
mov edi, [esp + 8 + 16] // dst_frame mov ecx, [esp + 4 + 16] // pix
mov ecx, [esp + 8 + 20] // width sub edi, eax
sub edx, esi
convertloop: convertloop:
movq xmm2, qword ptr [esi] // U movdqa xmm0, [eax]
movq xmm3, qword ptr [esi + edx] // V pavgb xmm0, [eax + edx]
lea esi, [esi + 8] movdqa [eax + edi], xmm0
punpcklbw xmm2, xmm3 // UV lea eax, [eax + 16]
movdqa xmm0, [eax] // Y
movdqa xmm1, xmm2
lea eax, [eax + 16]
punpcklbw xmm1, xmm0 // UYVY
punpckhbw xmm2, xmm0
movdqa [edi], xmm1
movdqa [edi + 16], xmm2
lea edi, [edi + 32]
sub ecx, 16 sub ecx, 16
ja convertloop ja convertloop
pop edi pop edi
pop esi
ret ret
} }
} }
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa (%0),%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#define HAS_I42XTOUYVYROW_SSE2 #elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
static void I42xToUYVYRow_SSE2(const uint8* src_y, #define HAS_HALFROW_SSE2
const uint8* src_u, static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
const uint8* src_v, uint8* dst_uv, int pix) {
uint8* dst_frame, int width) {
asm volatile ( asm volatile (
"sub %1,%2 \n" "sub %0,%1 \n"
"1: \n" "1: \n"
"movq (%1),%%xmm2 \n" "movdqa (%0),%%xmm0 \n"
"movq (%1,%2,1),%%xmm3 \n" "pavgb (%0,%3),%%xmm0 \n"
"lea 0x8(%1),%1 \n" "movdqa %%xmm0,(%0,%1) \n"
"punpcklbw %%xmm3,%%xmm2 \n" "lea 0x10(%0),%0 \n"
"movdqa (%0),%%xmm0 \n" "sub $0x10,%2 \n"
"movdqa %%xmm2,%%xmm1 \n" "ja 1b \n"
"lea 0x10(%0),%0 \n" : "+r"(src_uv), // %0
"punpcklbw %%xmm0,%%xmm1 \n" "+r"(dst_uv), // %1
"punpckhbw %%xmm0,%%xmm2 \n" "+r"(pix) // %2
"movdqa %%xmm1,(%3) \n" : "r"(static_cast<intptr_t>(src_uv_stride)) // %3
"movdqa %%xmm2,0x10(%3) \n" : "memory", "cc"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__) #if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3" , "xmm0"
#endif #endif
); );
} }
#endif #endif
void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v, static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_frame, int width) { uint8* dst_uv, int pix) {
for (int x = 0; x < width - 1; x += 2) { for (int x = 0; x < pix; ++x) {
dst_frame[0] = src_y[0]; dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[1];
dst_frame[3] = src_v[0];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[0]; // duplicate last y
dst_frame[3] = src_v[0];
}
}
void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[1];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[0]; // duplicate last y
}
}
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define READWORD(p) (*((uint32*) (p)))
#define WRITEWORD(p, v) (*((uint32*) (p))) = v
#else
uint32 READWORD(const uint8* p) {
return (uint32) p[0] |
((uint32) (p[1]) << 8) |
((uint32) (p[2]) << 16) |
((uint32) (p[3]) << 24);
}
void WRITEWORD(uint8* p, uint32 v) {
p[0] = (uint8)(v & 255);
p[1] = (uint8)((v >> 8) & 255);
p[2] = (uint8)((v >> 16) & 255);
p[3] = (uint8)((v >> 24) & 255);
}
#endif
// Must be multiple of 6 pixels. Will over convert to handle remainder.
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
for (int x = 0; x < width; x += 6) {
uint32 w = READWORD(src_v210 + 0);
dst_uyvy[0] = (w >> 2) & 0xff;
dst_uyvy[1] = (w >> 12) & 0xff;
dst_uyvy[2] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 4);
dst_uyvy[3] = (w >> 2) & 0xff;
dst_uyvy[4] = (w >> 12) & 0xff;
dst_uyvy[5] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 8);
dst_uyvy[6] = (w >> 2) & 0xff;
dst_uyvy[7] = (w >> 12) & 0xff;
dst_uyvy[8] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 12);
dst_uyvy[9] = (w >> 2) & 0xff;
dst_uyvy[10] = (w >> 12) & 0xff;
dst_uyvy[11] = (w >> 22) & 0xff;
src_v210 += 16;
dst_uyvy += 12;
}
}
#define EIGHTTOTEN(x) (x << 2 | x >> 6)
void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) {
for (int x = 0; x < width; x += 6) {
WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) |
(EIGHTTOTEN(src_uyvy[1]) << 10) |
(EIGHTTOTEN(src_uyvy[2]) << 20));
WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) |
(EIGHTTOTEN(src_uyvy[4]) << 10) |
(EIGHTTOTEN(src_uyvy[5]) << 20));
WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) |
(EIGHTTOTEN(src_uyvy[7]) << 10) |
(EIGHTTOTEN(src_uyvy[8]) << 20));
WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) |
(EIGHTTOTEN(src_uyvy[10]) << 10) |
(EIGHTTOTEN(src_uyvy[11]) << 20));
src_uyvy += 12;
dst_v210 += 16;
} }
} }
int I422ToYUY2(const uint8* src_y, int src_stride_y, int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame; src_y = src_y + (height - 1) * src_stride_y;
dst_stride_frame = -dst_stride_frame; src_u = src_u + (height - 1) * src_stride_u;
} src_v = src_v + (height - 1) * src_stride_v;
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, src_stride_y = -src_stride_y;
const uint8* src_v, uint8* dst_frame, int width); src_stride_u = -src_stride_u;
I42xToYUY2Row = I42xToYUY2Row_C; src_stride_v = -src_stride_v;
#if defined(HAS_I42XTOYUY2ROW_SSE2) }
int halfwidth = (width + 1) >> 1;
void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
#if defined(HAS_HALFROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
I42xToYUY2Row = I42xToYUY2Row_SSE2; IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
} IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
HalfRow = HalfRow_SSE2;
} else
#endif #endif
{
HalfRow = HalfRow_C;
}
for (int y = 0; y < height; ++y) { // Copy Y plane
I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width); if (dst_y) {
src_y += src_stride_y; CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
src_u += src_stride_u; }
src_v += src_stride_v;
dst_frame += dst_stride_frame; // SubSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
HalfRow(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
}
if (height & 1) {
HalfRow(src_v, 0, dst_v, halfwidth);
} }
return 0; return 0;
} }
int I420ToYUY2(const uint8* src_y, int src_stride_y, // Blends 32x2 pixels to 16x1
// source in scale.cc
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SCALEROWDOWN2_NEON
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
uint8* dst, int dst_width);
#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(YUV_DISABLE_ASM)
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#endif
void ScaleRowDown2Int_C(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame; src_y = src_y + (height - 1) * src_stride_y;
dst_stride_frame = -dst_stride_frame; src_u = src_u + (height - 1) * src_stride_u;
} src_v = src_v + (height - 1) * src_stride_v;
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u, src_stride_y = -src_stride_y;
const uint8* src_v, uint8* dst_frame, int width); src_stride_u = -src_stride_u;
I42xToYUY2Row = I42xToYUY2Row_C; src_stride_v = -src_stride_v;
#if defined(HAS_I42XTOYUY2ROW_SSE2) }
int halfwidth = (width + 1) >> 1;
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_NEON;
} else
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
I42xToYUY2Row = I42xToYUY2Row_SSE2; IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
} IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_SSE2;
#endif #endif
{
ScaleRowDown2 = ScaleRowDown2Int_C;
}
for (int y = 0; y < height - 1; y += 2) { // Copy Y plane
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width); if (dst_y) {
I42xToYUY2Row(src_y + src_stride_y, src_u, src_v, CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
dst_frame + dst_stride_frame, width); }
src_y += src_stride_y * 2;
src_u += src_stride_u; // SubSample U plane.
src_v += src_stride_v; int y;
dst_frame += dst_stride_frame * 2; for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
ScaleRowDown2(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
} }
if (height & 1) { if (height & 1) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width); ScaleRowDown2(src_v, 0, dst_v, halfwidth);
} }
return 0; return 0;
} }
int I422ToUYVY(const uint8* src_y, int src_stride_y, // I400 is greyscale typically used in MJPG
const uint8* src_u, int src_stride_u, int I400ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y,
uint8* dst_frame, int dst_stride_frame, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame; src_y = src_y + (height - 1) * src_stride_y;
dst_stride_frame = -dst_stride_frame; src_stride_y = -src_stride_y;
} }
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, int halfwidth = (width + 1) >> 1;
const uint8* src_v, uint8* dst_frame, int width); int halfheight = (height + 1) >> 1;
I42xToUYVYRow = I42xToUYVYRow_C; CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
#if defined(HAS_I42XTOUYVYROW_SSE2) SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
if (TestCpuFlag(kCpuHasSSE2) && SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
}
return 0; return 0;
} }
int I420ToUYVY(const uint8* src_y, int src_stride_y, static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
const uint8* src_u, int src_stride_u, uint8* dst, int dst_stride_frame,
const uint8* src_v, int src_stride_v, int width, int height) {
uint8* dst_frame, int dst_stride_frame, // Copy plane
int width, int height) { for (int y = 0; y < height; y += 2) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { memcpy(dst, src, width);
return -1; src += src_stride_0;
dst += dst_stride_frame;
memcpy(dst, src, width);
src += src_stride_1;
dst += dst_stride_frame;
} }
}
// Support converting from FOURCC_M420
// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
// easy conversion to I420.
// M420 format description:
// M420 is row biplanar 420: 2 rows of Y and 1 row of VU.
// Chroma is half width / half height. (420)
// src_stride_m420 is row planar. Normally this will be the width in pixels.
// The UV plane is half width, but 2 values, so src_stride_m420 applies to
// this as well as the two Y planes.
static int X420ToI420(const uint8* src_y,
int src_stride_y0, int src_stride_y1,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame; int halfheight = (height + 1) >> 1;
dst_stride_frame = -dst_stride_frame; dst_y = dst_y + (height - 1) * dst_stride_y;
} dst_u = dst_u + (halfheight - 1) * dst_stride_u;
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, dst_v = dst_v + (halfheight - 1) * dst_stride_v;
const uint8* src_v, uint8* dst_frame, int width); dst_stride_y = -dst_stride_y;
I42xToUYVYRow = I42xToUYVYRow_C; dst_stride_u = -dst_stride_u;
#if defined(HAS_I42XTOUYVYROW_SSE2) dst_stride_v = -dst_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_NEON;
} else
#elif defined(HAS_SPLITUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) { IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
I42xToUYVYRow = I42xToUYVYRow_SSE2; IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
} SplitUV = SplitUV_SSE2;
} else
#endif #endif
{
SplitUV = SplitUV_C;
}
for (int y = 0; y < height - 1; y += 2) { if (dst_y) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width); CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, width, height);
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
} }
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width); int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) {
// Copy a row of UV.
SplitUV(src_uv, dst_u, dst_v, halfwidth);
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_uv += src_stride_uv;
} }
return 0; return 0;
} }
int I420ToV210(const uint8* src_y, int src_stride_y, // Convert NV12 to I420.
const uint8* src_u, int src_stride_u, int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_v, int src_stride_v, const uint8* src_uv, int src_stride_uv,
uint8* dst_frame, int dst_stride_frame, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required return X420ToI420(src_y, src_stride_y, src_stride_y,
src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) { src_uv, src_stride_uv,
return -1; dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
// Convert M420 to I420.
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
__declspec(naked)
static void SplitYUY2_SSE2(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_yuy2
mov edx, [esp + 8 + 8] // dst_y
mov esi, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm2, xmm5 // even bytes are Y
pand xmm3, xmm5
packuswb xmm2, xmm3
movdqa [edx], xmm2
lea edx, [edx + 16]
psrlw xmm0, 8 // YUYV -> UVUV
psrlw xmm1, 8
packuswb xmm0, xmm1
movdqa xmm1, xmm0
pand xmm0, xmm5 // U
packuswb xmm0, xmm0
movq qword ptr [esi], xmm0
lea esi, [esi + 8]
psrlw xmm1, 8 // V
packuswb xmm1, xmm1
movq qword ptr [edi], xmm1
lea edi, [edi + 8]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm2 \n"
"pand %%xmm5,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm2,(%1) \n"
"lea 0x10(%1),%1 \n"
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pand %%xmm5,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movq %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm1,(%3) \n"
"lea 0x8(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(pix) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
static void SplitYUY2_C(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of YUY2.
for (int x = 0; x < pix; x += 2) {
dst_y[0] = src_yuy2[0];
dst_y[1] = src_yuy2[2];
dst_u[0] = src_yuy2[1];
dst_v[0] = src_yuy2[3];
src_yuy2 += 4;
dst_y += 2;
dst_u += 1;
dst_v += 1;
} }
}
// Convert Q420 to I420.
// Format is rows of YY/YUYV
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame; int halfheight = (height + 1) >> 1;
dst_stride_frame = -dst_stride_frame; dst_y = dst_y + (height - 1) * dst_stride_y;
} dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
SIMD_ALIGNED(uint8 row[kMaxStride]); dst_stride_y = -dst_stride_y;
void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix); dst_stride_u = -dst_stride_u;
UYVYToV210Row = UYVYToV210Row_C; dst_stride_v = -dst_stride_v;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u, void (*SplitYUY2)(const uint8* src_yuy2,
const uint8* src_v, uint8* dst_frame, int width); uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix);
I42xToUYVYRow = I42xToUYVYRow_C; #if defined(HAS_SPLITYUY2_SSE2)
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) { IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
I42xToUYVYRow = I42xToUYVYRow_SSE2; IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
} SplitYUY2 = SplitYUY2_SSE2;
} else
#endif #endif
{
for (int y = 0; y < height - 1; y += 2) { SplitYUY2 = SplitYUY2_C;
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
} }
if (height & 1) { for (int y = 0; y < height; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, row, width); memcpy(dst_y, src_y, width);
UYVYToV210Row(row, dst_frame, width); dst_y += dst_stride_y;
src_y += src_stride_y;
// Copy a row of YUY2.
SplitYUY2(src_yuy2, dst_y, dst_u, dst_v, width);
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_yuy2 += src_stride_yuy2;
} }
return 0; return 0;
} }
...@@ -647,6 +691,56 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, ...@@ -647,6 +691,56 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
return 0; return 0;
} }
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define READWORD(p) (*((uint32*) (p)))
#else
static inline uint32 READWORD(const uint8* p) {
return (uint32) p[0] |
((uint32) (p[1]) << 8) |
((uint32) (p[2]) << 16) |
((uint32) (p[3]) << 24);
}
#endif
// Must be multiple of 6 pixels. Will over convert to handle remainder.
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
static void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
for (int x = 0; x < width; x += 6) {
uint32 w = READWORD(src_v210 + 0);
dst_uyvy[0] = (w >> 2) & 0xff;
dst_uyvy[1] = (w >> 12) & 0xff;
dst_uyvy[2] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 4);
dst_uyvy[3] = (w >> 2) & 0xff;
dst_uyvy[4] = (w >> 12) & 0xff;
dst_uyvy[5] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 8);
dst_uyvy[6] = (w >> 2) & 0xff;
dst_uyvy[7] = (w >> 12) & 0xff;
dst_uyvy[8] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 12);
dst_uyvy[9] = (w >> 2) & 0xff;
dst_uyvy[10] = (w >> 12) & 0xff;
dst_uyvy[11] = (w >> 22) & 0xff;
src_v210 += 16;
dst_uyvy += 12;
}
}
// Convert V210 to I420. // Convert V210 to I420.
// V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels. // V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels.
// With is multiple of 48. // With is multiple of 48.
......
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_from.h"
#include <string.h> // For memcpy()
#include "libyuv/basic_types.h"
#include "libyuv/convert.h" // For I420Copy
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/video_common.h"
#include "row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
// UpSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_u, src_u, halfwidth);
memcpy(dst_u + dst_stride_u, src_u, halfwidth);
src_u += src_stride_u;
dst_u += dst_stride_u * 2;
}
if (height & 1) {
memcpy(dst_u, src_u, halfwidth);
}
// UpSample V plane.
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_v, src_v, halfwidth);
memcpy(dst_v + dst_stride_v, src_v, halfwidth);
src_v += src_stride_v;
dst_v += dst_stride_v * 2;
}
if (height & 1) {
memcpy(dst_v, src_v, halfwidth);
}
return 0;
}
// use Bilinear for upsampling chroma
void ScalePlaneBilinear(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr);
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Upsample U plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_u,
dst_stride_u,
src_u, dst_u);
// Upsample V plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_v,
dst_stride_v,
src_v, dst_v);
return 0;
}
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
}
// YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
// UYVY - Macro-pixel = 2 image pixels
// U0Y0V0Y1
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
__declspec(naked)
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
lea eax, [eax + 16]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2 // YUYV
punpckhbw xmm1, xmm2
movdqa [edi], xmm0
movdqa [edi + 16], xmm1
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#define HAS_I42XTOUYVYROW_SSE2
__declspec(naked)
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
movdqa xmm1, xmm2
lea eax, [eax + 16]
punpcklbw xmm1, xmm0 // UYVY
punpckhbw xmm2, xmm0
movdqa [edi], xmm1
movdqa [edi + 16], xmm2
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa (%0),%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#define HAS_I42XTOUYVYROW_SSE2
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"lea 0x10(%0),%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,(%3) \n"
"movdqa %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#endif
static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[1];
dst_frame[3] = src_v[0];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[0]; // duplicate last y
dst_frame[3] = src_v[0];
}
}
static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[1];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[0]; // duplicate last y
}
}
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define WRITEWORD(p, v) (*((uint32*) (p))) = v
#else
static inline void WRITEWORD(uint8* p, uint32 v) {
p[0] = (uint8)(v & 255);
p[1] = (uint8)((v >> 8) & 255);
p[2] = (uint8)((v >> 16) & 255);
p[3] = (uint8)((v >> 24) & 255);
}
#endif
#define EIGHTTOTEN(x) (x << 2 | x >> 6)
static void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) {
for (int x = 0; x < width; x += 6) {
WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) |
(EIGHTTOTEN(src_uyvy[1]) << 10) |
(EIGHTTOTEN(src_uyvy[2]) << 20));
WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) |
(EIGHTTOTEN(src_uyvy[4]) << 10) |
(EIGHTTOTEN(src_uyvy[5]) << 20));
WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) |
(EIGHTTOTEN(src_uyvy[7]) << 10) |
(EIGHTTOTEN(src_uyvy[8]) << 20));
WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) |
(EIGHTTOTEN(src_uyvy[10]) << 10) |
(EIGHTTOTEN(src_uyvy[11]) << 20));
src_uyvy += 12;
dst_v210 += 16;
}
}
// TODO(fbarchard): Deprecate, move or expand 422 support?
int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
}
return 0;
}
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I42xToYUY2Row(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
}
return 0;
}
// TODO(fbarchard): Deprecate, move or expand 422 support?
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
}
return 0;
}
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
}
return 0;
}
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required
src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix);
UYVYToV210Row = UYVYToV210Row_C;
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
}
return 0;
}
// Convert I420 to ARGB.
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
}
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
}
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to BGRA.
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_bgra = -dst_stride_bgra;
}
void (*I420ToBGRARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToBGRARow = I420ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToBGRARow = I420ToBGRARow_NEON;
}
} else
#elif defined(HAS_I420TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToBGRARow = I420ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I420ToBGRARow = I420ToBGRARow_SSSE3;
}
} else
#endif
{
I420ToBGRARow = I420ToBGRARow_C;
}
for (int y = 0; y < height; ++y) {
I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ABGR.
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
void (*I420ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToABGRRow = I420ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToABGRRow = I420ToABGRRow_NEON;
}
} else
#elif defined(HAS_I420TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToABGRRow = I420ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I420ToABGRRow = I420ToABGRRow_SSSE3;
}
} else
#endif
{
I420ToABGRRow = I420ToABGRRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB24.
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
} else
#endif
{
ARGBToRGB24Row = ARGBToRGB24Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB24Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RAW.
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
} else
#endif
{
ARGBToRAWRow = ARGBToRAWRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRAWRow(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB565.
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb, int dst_stride_rgb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
dst_stride_rgb = -dst_stride_rgb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
} else
#endif
{
ARGBToRGB565Row = ARGBToRGB565Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB565Row(row, dst_rgb, width);
dst_rgb += dst_stride_rgb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB1555.
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
}
} else
#endif
{
ARGBToARGB1555Row = ARGBToARGB1555Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB1555Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB4444.
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
}
} else
#endif
{
ARGBToARGB4444Row = ARGBToARGB4444Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB4444Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to specified format
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format) {
if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) {
return -1;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
I420ToYUY2(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_UYVY:
I420ToUYVY(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_V210:
I420ToV210(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride :
(width + 47) / 48 * 128,
width, height);
break;
case FOURCC_RGBP:
I420ToRGB565(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_RGBO:
I420ToARGB1555(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_R444:
I420ToARGB4444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_24BG:
I420ToRGB24(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_RAW:
I420ToRAW(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_ARGB:
I420ToARGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGRA:
I420ToBGRA(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_ABGR:
I420ToABGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGGR:
I420ToBayerBGGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GBRG:
I420ToBayerGBRG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GRBG:
I420ToBayerGRBG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_RGGB:
I420ToBayerRGGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_I400:
I400Copy(y, y_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
// Triplanar formats
// TODO(fbarchard): halfstride instead of halfwidth
case FOURCC_I420:
case FOURCC_YV12: {
int halfwidth = (width + 1) / 2;
int halfheight = (height + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I420) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * halfheight;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * halfheight;
}
I420Copy(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
int halfwidth = (width + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I422) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * height;
}
I420ToI422(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I444) {
dst_u = dst_sample + width * height;
dst_v = dst_u + width * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + width * height;
}
I420ToI444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, width,
dst_v, width,
width, height);
break;
}
// Formats not supported - MJPG, biplanar, some rgb formats.
default:
return -1; // unknown fourcc - return failure code.
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert.h"
#include "libyuv/basic_types.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert I420 to specified format
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format) {
if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) {
return -1;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
I420ToYUY2(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_UYVY:
I420ToUYVY(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_V210:
I420ToV210(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride :
(width + 47) / 48 * 128,
width, height);
break;
case FOURCC_RGBP:
I420ToRGB565(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_RGBO:
I420ToARGB1555(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_R444:
I420ToARGB4444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_24BG:
I420ToRGB24(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_RAW:
I420ToRAW(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_ARGB:
I420ToARGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGRA:
I420ToBGRA(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_ABGR:
I420ToABGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGGR:
I420ToBayerBGGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GBRG:
I420ToBayerGBRG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GRBG:
I420ToBayerGRBG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_RGGB:
I420ToBayerRGGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_I400:
I400Copy(y, y_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
// Triplanar formats
// TODO(fbarchard): halfstride instead of halfwidth
case FOURCC_I420:
case FOURCC_YV12: {
int halfwidth = (width + 1) / 2;
int halfheight = (height + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I420) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * halfheight;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * halfheight;
}
I420Copy(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
int halfwidth = (width + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I422) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * height;
}
I420ToI422(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I444) {
dst_u = dst_sample + width * height;
dst_v = dst_u + width * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + width * height;
}
I420ToI444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, width,
dst_v, width,
width, height);
break;
}
// Formats not supported - MJPG, biplanar, some rgb formats.
default:
return -1; // unknown fourcc - return failure code.
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
...@@ -20,116 +20,6 @@ namespace libyuv { ...@@ -20,116 +20,6 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITUV_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
static void SplitUV_NEON(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"1: \n"
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
"vst1.u8 {q0}, [%1]! \n" // store U
"vst1.u8 {q1}, [%2]! \n" // Store V
"bhi 1b \n"
: "+r"(src_uv),
"+r"(dst_u),
"+r"(dst_v),
"+r"(pix) // Output registers
: // Input registers
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#elif defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITUV_SSE2
__declspec(naked)
static void SplitUV_SSE2(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm0, xmm5 // even bytes
pand xmm1, xmm5
packuswb xmm0, xmm1
psrlw xmm2, 8 // odd bytes
psrlw xmm3, 8
packuswb xmm2, xmm3
movdqa [edx], xmm0
movdqa [edx + edi], xmm2
lea edx, [edx + 16]
sub ecx, 16
ja convertloop
pop edi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITUV_SSE2
static void SplitUV_SSE2(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"psrlw $0x8,%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm0,(%1) \n"
"movdqa %%xmm2,(%1,%2) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%3 \n"
"ja 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
static void SplitUV_C(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of UV.
for (int x = 0; x < pix; ++x) {
dst_u[0] = src_uv[0];
dst_v[0] = src_uv[1];
src_uv += 2;
dst_u += 1;
dst_v += 1;
}
}
// CopyRows copys 'count' bytes using a 16 byte load/store, 64 bytes at time // CopyRows copys 'count' bytes using a 16 byte load/store, 64 bytes at time
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM) #if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_COPYROW_SSE2 #define HAS_COPYROW_SSE2
...@@ -191,1171 +81,144 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { ...@@ -191,1171 +81,144 @@ void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
#endif #endif
); );
} }
#define HAS_COPYROW_X86 #define HAS_COPYROW_X86
void CopyRow_X86(const uint8* src, uint8* dst, int width) { void CopyRow_X86(const uint8* src, uint8* dst, int width) {
size_t width_tmp = static_cast<size_t>(width); size_t width_tmp = static_cast<size_t>(width);
asm volatile ( asm volatile (
"shr $0x2,%2 \n" "shr $0x2,%2 \n"
"rep movsl \n" "rep movsl \n"
: "+S"(src), // %0 : "+S"(src), // %0
"+D"(dst), // %1 "+D"(dst), // %1
"+c"(width_tmp) // %2 "+c"(width_tmp) // %2
: :
: "memory", "cc" : "memory", "cc"
); );
} }
#endif
void CopyRow_C(const uint8* src, uint8* dst, int count) {
memcpy(dst, src, count);
}
// Copy a plane of data
void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
void (*CopyRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_SSE2;
} else
#endif
#if defined(HAS_COPYROW_X86)
if (IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
CopyRow = CopyRow_X86;
} else
#endif
{
CopyRow = CopyRow_C;
}
// Copy plane
for (int y = 0; y < height; ++y) {
CopyRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}
// Copy I420 with optional flipping
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Mirror a plane of data
void MirrorPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_NEON;
} else
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
MirrorRow = MirrorRow_SSSE3;
} else
#endif
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSE2;
} else
#endif
{
MirrorRow = MirrorRow_C;
}
// Mirror plane
for (int y = 0; y < height; ++y) {
MirrorRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}
// Mirror I420 with optional flipping
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Copy ARGB with optional flipping
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0;
}
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_HALFROW_SSE2
__declspec(naked)
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
convertloop:
movdqa xmm0, [eax]
pavgb xmm0, [eax + edx]
movdqa [eax + edi], xmm0
lea eax, [eax + 16]
sub ecx, 16
ja convertloop
pop edi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_HALFROW_SSE2
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
asm volatile (
"sub %0,%1 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"pavgb (%0,%3),%%xmm0 \n"
"movdqa %%xmm0,(%0,%1) \n"
"lea 0x10(%0),%0 \n"
"sub $0x10,%2 \n"
"ja 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_uv), // %1
"+r"(pix) // %2
: "r"(static_cast<intptr_t>(src_uv_stride)) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0"
#endif
);
}
#endif
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
for (int x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
#if defined(HAS_HALFROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
HalfRow = HalfRow_SSE2;
} else
#endif
{
HalfRow = HalfRow_C;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// SubSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
HalfRow(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
}
if (height & 1) {
HalfRow(src_v, 0, dst_v, halfwidth);
}
return 0;
}
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
// UpSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_u, src_u, halfwidth);
memcpy(dst_u + dst_stride_u, src_u, halfwidth);
src_u += src_stride_u;
dst_u += dst_stride_u * 2;
}
if (height & 1) {
memcpy(dst_u, src_u, halfwidth);
}
// UpSample V plane.
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_v, src_v, halfwidth);
memcpy(dst_v + dst_stride_v, src_v, halfwidth);
src_v += src_stride_v;
dst_v += dst_stride_v * 2;
}
if (height & 1) {
memcpy(dst_v, src_v, halfwidth);
}
return 0;
}
// Blends 32x2 pixels to 16x1
// source in scale.cc
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SCALEROWDOWN2_NEON
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
uint8* dst, int dst_width);
#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(YUV_DISABLE_ASM)
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#endif
void ScaleRowDown2Int_C(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_NEON;
} else
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_SSE2;
#endif
{
ScaleRowDown2 = ScaleRowDown2Int_C;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// SubSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
ScaleRowDown2(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
}
if (height & 1) {
ScaleRowDown2(src_v, 0, dst_v, halfwidth);
}
return 0;
}
// use Bilinear for upsampling chroma
void ScalePlaneBilinear(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr);
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Upsample U plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_u,
dst_stride_u,
src_u, dst_u);
// Upsample V plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_v,
dst_stride_v,
src_v, dst_v);
return 0;
}
static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
uint8* dst, int dst_stride_frame,
int width, int height) {
// Copy plane
for (int y = 0; y < height; y += 2) {
memcpy(dst, src, width);
src += src_stride_0;
dst += dst_stride_frame;
memcpy(dst, src, width);
src += src_stride_1;
dst += dst_stride_frame;
}
}
// Support converting from FOURCC_M420
// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
// easy conversion to I420.
// M420 format description:
// M420 is row biplanar 420: 2 rows of Y and 1 row of VU.
// Chroma is half width / half height. (420)
// src_stride_m420 is row planar. Normally this will be the width in pixels.
// The UV plane is half width, but 2 values, so src_stride_m420 applies to
// this as well as the two Y planes.
static int X420ToI420(const uint8* src_y,
int src_stride_y0, int src_stride_y1,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_NEON;
} else
#elif defined(HAS_SPLITUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_SSE2;
} else
#endif
{
SplitUV = SplitUV_C;
}
if (dst_y) {
CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
width, height);
}
int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) {
// Copy a row of UV.
SplitUV(src_uv, dst_u, dst_v, halfwidth);
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_uv += src_stride_uv;
}
return 0;
}
// Convert M420 to I420.
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
// Convert NV12 to I420.
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
return X420ToI420(src_y, src_stride_y, src_stride_y,
src_uv, src_stride_uv,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
__declspec(naked)
static void SplitYUY2_SSE2(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_yuy2
mov edx, [esp + 8 + 8] // dst_y
mov esi, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm2, xmm5 // even bytes are Y
pand xmm3, xmm5
packuswb xmm2, xmm3
movdqa [edx], xmm2
lea edx, [edx + 16]
psrlw xmm0, 8 // YUYV -> UVUV
psrlw xmm1, 8
packuswb xmm0, xmm1
movdqa xmm1, xmm0
pand xmm0, xmm5 // U
packuswb xmm0, xmm0
movq qword ptr [esi], xmm0
lea esi, [esi + 8]
psrlw xmm1, 8 // V
packuswb xmm1, xmm1
movq qword ptr [edi], xmm1
lea edi, [edi + 8]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm2 \n"
"pand %%xmm5,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm2,(%1) \n"
"lea 0x10(%1),%1 \n"
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pand %%xmm5,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movq %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm1,(%3) \n"
"lea 0x8(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(pix) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
static void SplitYUY2_C(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of YUY2.
for (int x = 0; x < pix; x += 2) {
dst_y[0] = src_yuy2[0];
dst_y[1] = src_yuy2[2];
dst_u[0] = src_yuy2[1];
dst_v[0] = src_yuy2[3];
src_yuy2 += 4;
dst_y += 2;
dst_u += 1;
dst_v += 1;
}
}
// Convert Q420 to I420.
// Format is rows of YY/YUYV
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
void (*SplitYUY2)(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITYUY2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
SplitYUY2 = SplitYUY2_SSE2;
} else
#endif
{
SplitYUY2 = SplitYUY2_C;
}
for (int y = 0; y < height; y += 2) {
memcpy(dst_y, src_y, width);
dst_y += dst_stride_y;
src_y += src_stride_y;
// Copy a row of YUY2.
SplitYUY2(src_yuy2, dst_y, dst_u, dst_v, width);
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_yuy2 += src_stride_yuy2;
}
return 0;
}
// Convert I420 to ARGB.
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
}
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
}
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to BGRA.
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_bgra = -dst_stride_bgra;
}
void (*I420ToBGRARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToBGRARow = I420ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToBGRARow = I420ToBGRARow_NEON;
}
} else
#elif defined(HAS_I420TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToBGRARow = I420ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I420ToBGRARow = I420ToBGRARow_SSSE3;
}
} else
#endif
{
I420ToBGRARow = I420ToBGRARow_C;
}
for (int y = 0; y < height; ++y) {
I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ABGR.
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
void (*I420ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToABGRRow = I420ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToABGRRow = I420ToABGRRow_NEON;
}
} else
#elif defined(HAS_I420TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToABGRRow = I420ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I420ToABGRRow = I420ToABGRRow_SSSE3;
}
} else
#endif
{
I420ToABGRRow = I420ToABGRRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB24.
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
} else
#endif #endif
{
ARGBToRGB24Row = ARGBToRGB24Row_C;
}
for (int y = 0; y < height; ++y) { void CopyRow_C(const uint8* src, uint8* dst, int count) {
I420ToARGBRow(src_y, src_u, src_v, row, width); memcpy(dst, src, count);
ARGBToRGB24Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
} }
// Convert I420 to RAW. // Copy a plane of data
int I420ToRAW(const uint8* src_y, int src_stride_y, void CopyPlane(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, uint8* dst_y, int dst_stride_y,
const uint8* src_v, int src_stride_v, int width, int height) {
uint8* dst_argb, int dst_stride_argb, void (*CopyRow)(const uint8* src, uint8* dst, int width);
int width, int height) { #if defined(HAS_COPYROW_SSE2)
// Negative height means invert the image. if (TestCpuFlag(kCpuHasSSE2) &&
if (height < 0) { IS_ALIGNED(width, 32) &&
height = -height; IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
dst_argb = dst_argb + (height - 1) * dst_stride_argb; IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
dst_stride_argb = -dst_stride_argb; CopyRow = CopyRow_SSE2;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else } else
#endif #endif
{ #if defined(HAS_COPYROW_X86)
I420ToARGBRow = I420ToARGBRow_C; if (IS_ALIGNED(width, 4) &&
} IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
SIMD_ALIGNED(uint8 row[kMaxStride]); CopyRow = CopyRow_X86;
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
} else } else
#endif #endif
{ {
ARGBToRAWRow = ARGBToRAWRow_C; CopyRow = CopyRow_C;
} }
// Copy plane
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); CopyRow(src_y, dst_y, width);
ARGBToRAWRow(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { dst_y += dst_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
} }
return 0;
} }
// Convert I420 to RGB565. // Mirror a plane of data
int I420ToRGB565(const uint8* src_y, int src_stride_y, void MirrorPlane(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, uint8* dst_y, int dst_stride_y,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb, int dst_stride_rgb,
int width, int height) { int width, int height) {
// Negative height means invert the image. void (*MirrorRow)(const uint8* src, uint8* dst, int width);
if (height < 0) { #if defined(HAS_MIRRORROW_NEON)
height = -height; if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; MirrorRow = MirrorRow_NEON;
dst_stride_rgb = -dst_stride_rgb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else } else
#elif defined(HAS_I420TOARGBROW_SSSE3) #endif
if (TestCpuFlag(kCpuHasSSSE3)) { #if defined(HAS_MIRRORROW_SSSE3)
I420ToARGBRow = I420ToARGBRow_SSSE3; if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
MirrorRow = MirrorRow_SSSE3;
} else } else
#endif #endif
{ #if defined(HAS_MIRRORROW_SSE2)
I420ToARGBRow = I420ToARGBRow_C; if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
} MirrorRow = MirrorRow_SSE2;
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
} else } else
#endif #endif
{ {
ARGBToRGB565Row = ARGBToRGB565Row_C; MirrorRow = MirrorRow_C;
} }
// Mirror plane
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width); MirrorRow(src_y, dst_y, width);
ARGBToRGB565Row(row, dst_rgb, width);
dst_rgb += dst_stride_rgb;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { dst_y += dst_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
} }
return 0;
} }
// Convert I420 to ARGB1555. // Mirror I420 with optional flipping
int I420ToARGB1555(const uint8* src_y, int src_stride_y, int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u, const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v, const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb, uint8* dst_y, int dst_stride_y,
int width, int height) { uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb; int halfheight = (height + 1) >> 1;
dst_stride_argb = -dst_stride_argb; src_y = src_y + (height - 1) * src_stride_y;
} src_u = src_u + (halfheight - 1) * src_stride_u;
void (*I420ToARGBRow)(const uint8* y_buf, src_v = src_v + (halfheight - 1) * src_stride_v;
const uint8* u_buf, src_stride_y = -src_stride_y;
const uint8* v_buf, src_stride_u = -src_stride_u;
uint8* rgb_buf, src_stride_v = -src_stride_v;
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
}
} else
#endif
{
ARGBToARGB1555Row = ARGBToARGB1555Row_C;
} }
for (int y = 0; y < height; ++y) { int halfwidth = (width + 1) >> 1;
I420ToARGBRow(src_y, src_u, src_v, row, width); int halfheight = (height + 1) >> 1;
ARGBToARGB1555Row(row, dst_argb, width); if (dst_y) {
dst_argb += dst_stride_argb; MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
} }
MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0; return 0;
} }
// Convert I420 to ARGB4444. // Copy ARGB with optional flipping
int I420ToARGB4444(const uint8* src_y, int src_stride_y, int ARGBCopy(const uint8* src_argb, int src_stride_argb,
const uint8* src_u, int src_stride_u, uint8* dst_argb, int dst_stride_argb,
const uint8* src_v, int src_stride_v, int width, int height) {
uint8* dst_argb, int dst_stride_argb, if (!src_argb ||
int width, int height) { !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
dst_stride_argb = -dst_stride_argb; src_stride_argb = -src_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
}
} else
#endif
{
ARGBToARGB4444Row = ARGBToARGB4444Row_C;
} }
for (int y = 0; y < height; ++y) { CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
I420ToARGBRow(src_y, src_u, src_v, row, width); width * 4, height);
ARGBToARGB4444Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0; return 0;
} }
...@@ -1977,9 +840,9 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width, ...@@ -1977,9 +840,9 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width,
} }
#endif #endif
static void SetPlane(uint8* dst_y, int dst_stride_y, void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height, int width, int height,
uint32 value) { uint32 value) {
void (*SetRow)(uint8* dst, uint32 value, int pix); void (*SetRow)(uint8* dst, uint32 value, int pix);
#if defined(HAS_SETROW_NEON) #if defined(HAS_SETROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && if (TestCpuFlag(kCpuHasNEON) &&
...@@ -2068,40 +931,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, ...@@ -2068,40 +931,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
return 0; return 0;
} }
// I400 is greyscale typically used in MJPG
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
return 0;
}
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
}
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "libyuv/rotate.h" #include "libyuv/rotate.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "rotate_priv.h" #include "rotate_priv.h"
#include "row.h" #include "row.h"
......
...@@ -46,6 +46,7 @@ extern "C" { ...@@ -46,6 +46,7 @@ extern "C" {
#define HAS_I444TOARGBROW_SSSE3 #define HAS_I444TOARGBROW_SSSE3
#define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROW_SSE2 #define HAS_MIRRORROW_SSE2
#define HAS_SPLITUV_SSE2
#define HAS_YUY2TOYROW_SSE2 #define HAS_YUY2TOYROW_SSE2
#define HAS_UYVYTOYROW_SSE2 #define HAS_UYVYTOYROW_SSE2
#define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOUVROW_SSE2
...@@ -67,6 +68,7 @@ extern "C" { ...@@ -67,6 +68,7 @@ extern "C" {
// The following are available on Neon platforms // The following are available on Neon platforms
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM) #if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_MIRRORROW_NEON #define HAS_MIRRORROW_NEON
#define HAS_SPLITUV_NEON
#define HAS_I420TOARGBROW_NEON #define HAS_I420TOARGBROW_NEON
#define HAS_I420TOBGRAROW_NEON #define HAS_I420TOBGRAROW_NEON
#define HAS_I420TOABGRROW_NEON #define HAS_I420TOABGRROW_NEON
...@@ -125,6 +127,10 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); ...@@ -125,6 +127,10 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width);
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
void ABGRToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); void ABGRToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
......
...@@ -350,9 +350,7 @@ void I444ToARGBRow_C(const uint8* y_buf, ...@@ -350,9 +350,7 @@ void I444ToARGBRow_C(const uint8* y_buf,
} }
} }
void YToARGBRow_C(const uint8* y_buf, void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) {
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; ++x) { for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0); YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
y_buf += 1; y_buf += 1;
...@@ -368,6 +366,17 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) { ...@@ -368,6 +366,17 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) {
} }
} }
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of UV.
for (int x = 0; x < pix; ++x) {
dst_u[0] = src_uv[0];
dst_v[0] = src_uv[1];
src_uv += 2;
dst_u += 1;
dst_v += 1;
}
}
// Filter 2 rows of YUY2 UV's (422) into U and V (420) // Filter 2 rows of YUY2 UV's (422) into U and V (420)
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix) { uint8* dst_u, uint8* dst_v, int pix) {
......
...@@ -15,6 +15,9 @@ namespace libyuv { ...@@ -15,6 +15,9 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// This module is for GCC Neon
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define YUVTORGB \ #define YUVTORGB \
"vld1.u8 {d0}, [%0]! \n" \ "vld1.u8 {d0}, [%0]! \n" \
"vld1.u32 {d2[0]}, [%1]! \n" \ "vld1.u32 {d2[0]}, [%1]! \n" \
...@@ -160,6 +163,29 @@ YUVTORGB ...@@ -160,6 +163,29 @@ YUVTORGB
} }
#endif #endif
#if defined(HAS_SPLITUV_NEON)
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"1: \n"
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
"vst1.u8 {q0}, [%1]! \n" // store U
"vst1.u8 {q1}, [%2]! \n" // Store V
"bhi 1b \n"
: "+r"(src_uv),
"+r"(dst_u),
"+r"(dst_v),
"+r"(pix) // Output registers
: // Input registers
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#endif
#endif // __ARM_NEON__
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -17,6 +17,9 @@ namespace libyuv { ...@@ -17,6 +17,9 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// This module is for GCC x86 and x64
#if (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#ifdef __APPLE__ #ifdef __APPLE__
#define CONST #define CONST
#else #else
...@@ -816,7 +819,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -816,7 +819,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
"lea -0x10(%0),%0 \n" "lea -0x10(%0),%0 \n"
"1: \n" "1: \n"
"movdqu (%0,%2),%%xmm0 \n" "movdqu (%0,%2),%%xmm0 \n"
"movdqu %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm1 \n"
"psllw $0x8,%%xmm0 \n" "psllw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n" "psrlw $0x8,%%xmm1 \n"
"por %%xmm1,%%xmm0 \n" "por %%xmm1,%%xmm0 \n"
...@@ -839,6 +842,43 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { ...@@ -839,6 +842,43 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
} }
#endif #endif
#ifdef HAS_SPLITUV_SSE2
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"psrlw $0x8,%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm0,(%1) \n"
"movdqa %%xmm2,(%1,%2) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%3 \n"
"ja 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
#ifdef HAS_YUY2TOYROW_SSE2 #ifdef HAS_YUY2TOYROW_SSE2
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
asm volatile ( asm volatile (
...@@ -1099,9 +1139,10 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, ...@@ -1099,9 +1139,10 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
#endif #endif
); );
} }
#endif // HAS_YUY2TOYROW_SSE2 #endif // HAS_YUY2TOYROW_SSE2
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
...@@ -15,6 +15,9 @@ namespace libyuv { ...@@ -15,6 +15,9 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
// This module is for Visual C x86
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
// Constant multiplication table for converting ARGB to I400. // Constant multiplication table for converting ARGB to I400.
...@@ -1503,7 +1506,7 @@ __asm { ...@@ -1503,7 +1506,7 @@ __asm {
#ifdef HAS_MIRRORROW_SSE2 #ifdef HAS_MIRRORROW_SSE2
// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3 // SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
// version can not. // version can not.
__declspec(naked) __declspec(naked)
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
...@@ -1514,7 +1517,7 @@ __asm { ...@@ -1514,7 +1517,7 @@ __asm {
lea eax, [eax - 16] lea eax, [eax - 16]
convertloop: convertloop:
movdqu xmm0, [eax + ecx] movdqu xmm0, [eax + ecx]
movdqu xmm1, xmm0 // swap bytes movdqa xmm1, xmm0 // swap bytes
psllw xmm0, 8 psllw xmm0, 8
psrlw xmm1, 8 psrlw xmm1, 8
por xmm0, xmm1 por xmm0, xmm1
...@@ -1530,6 +1533,42 @@ __asm { ...@@ -1530,6 +1533,42 @@ __asm {
} }
#endif #endif
#ifdef HAS_SPLITUV_SSE2
__declspec(naked)
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm0, xmm5 // even bytes
pand xmm1, xmm5
packuswb xmm0, xmm1
psrlw xmm2, 8 // odd bytes
psrlw xmm3, 8
packuswb xmm2, xmm3
movdqa [edx], xmm0
movdqa [edx + edi], xmm2
lea edx, [edx + 16]
sub ecx, 16
ja convertloop
pop edi
ret
}
}
#endif
#ifdef HAS_YUY2TOYROW_SSE2 #ifdef HAS_YUY2TOYROW_SSE2
__declspec(naked) __declspec(naked)
void YUY2ToYRow_SSE2(const uint8* src_yuy2, void YUY2ToYRow_SSE2(const uint8* src_yuy2,
...@@ -1800,6 +1839,8 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, ...@@ -1800,6 +1839,8 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
} }
#endif // HAS_YUY2TOYROW_SSE2 #endif // HAS_YUY2TOYROW_SSE2
#endif // _M_IX86
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
} // namespace libyuv } // namespace libyuv
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment