Commit 2d11d43a authored by fbarchard@google.com's avatar fbarchard@google.com

shuffle functions so convert.h is all formats to I420 and convert_from.h is from…

shuffle functions so convert.h is all formats to I420 and convert_from.h is from I420 to all formats
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/395006

git-svn-id: http://libyuv.googlecode.com/svn/trunk@174 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 032b5f99
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 173
Version: 174
License: BSD
License File: LICENSE
......
......@@ -15,6 +15,7 @@
#include "libyuv/basic_types.h"
#include "libyuv/compare.h"
#include "libyuv/convert.h"
#include "libyuv/convert_from.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
......
......@@ -12,6 +12,7 @@
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
......@@ -19,117 +20,142 @@ namespace libyuv {
extern "C" {
#endif
// RGB24 is also known as 24BG and BGR3
int I420ToRGB24(const uint8* src_y, int src_stride_y,
// Copy I420 to I420.
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RAW is also known as RGB3
int I420ToRAW(const uint8* src_y, int src_stride_y,
// Convert I422 to I420.
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
// Convert I444 to I420.
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert I400 (grey) to I420.
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert NV12 to I420. Also used for NV21.
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert M420 to I420.
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert Q420 to I420.
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert YUY2 to I420.
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert UYVY to I420.
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert V210 to I420.
int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
// ARGB little endian (bgra in memory) to I420
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int RAWToI420(const uint8* src_frame, int src_stride_frame,
// BGRA little endian (argb in memory) to I420
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
// ABGR little endian (rgba in memory) to I420
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
// RGB little endian (bgr in memory) to I420
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
// RGB big endian (rgb in memory) to I420
int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
// RGB16 (RGBP fourcc) little endian to I420
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
// RGB15 (RGBO fourcc) little endian to I420
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
// RGB12 (R444 fourcc) little endian to I420
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.
......@@ -162,16 +188,6 @@ int ConvertToI420(const uint8* src_frame, size_t src_size,
RotationMode rotation,
uint32 format);
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// See Also convert.h for conversions from formats to I420
// I420Copy in convert to I420ToI420
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// TODO(fbarchard): I420ToNV12
// TODO(fbarchard): I420ToM420
// TODO(fbarchard): I420ToQ420
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_
......@@ -18,13 +18,13 @@ namespace libyuv {
extern "C" {
#endif
// Copy I420 to I420.
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value);
// Copy a plane of data (I420 to I400)
void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// I420 mirror
......@@ -36,49 +36,6 @@ int I420Mirror(const uint8* src_y, int src_stride_y,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I444 to I420.
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I420 to I444.
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I400 (grey) to I420.
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert NV12 to ARGB. Also used for NV21.
int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
......@@ -91,76 +48,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Convert NV12 to I420. Also used for NV21.
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert Q420 to I420.
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert M420 to I420.
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I420.
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I420.
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert V210 to I420.
int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I420 to ARGB.
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 to BGRA.
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 to ABGR.
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to ARGB.
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
......@@ -242,9 +129,16 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Copy a plane of data
void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
#ifdef __cplusplus
......
......@@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 173
#define LIBYUV_VERSION 174
#endif // INCLUDE_LIBYUV_VERSION_H_
......@@ -25,6 +25,7 @@
# includes
'include/libyuv/basic_types.h',
'include/libyuv/convert.h',
'include/libyuv/convert_from.h',
'include/libyuv/scale.h',
'include/libyuv/planar_functions.h',
'include/libyuv/video_common.h',
......@@ -37,7 +38,7 @@
# sources
'source/compare.cc',
'source/convert.cc',
'source/convertfrom.cc',
'source/convert_from.cc',
'source/cpu_id.cc',
'source/format_conversion.cc',
'source/planar_functions.cc',
......
......@@ -10,6 +10,8 @@
#include "libyuv/convert.h"
#include <string.h> // For memcpy()
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
......@@ -23,477 +25,519 @@ namespace libyuv {
extern "C" {
#endif
// YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
// UYVY - Macro-pixel = 2 image pixels
// U0Y0V0Y1
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
__declspec(naked)
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
lea eax, [eax + 16]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2 // YUYV
punpckhbw xmm1, xmm2
movdqa [edi], xmm0
movdqa [edi + 16], xmm1
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
// Copy I420 with optional flipping
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
#define HAS_I42XTOUYVYROW_SSE2
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_HALFROW_SSE2
__declspec(naked)
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
movdqa xmm1, xmm2
movdqa xmm0, [eax]
pavgb xmm0, [eax + edx]
movdqa [eax + edi], xmm0
lea eax, [eax + 16]
punpcklbw xmm1, xmm0 // UYVY
punpckhbw xmm2, xmm0
movdqa [edi], xmm1
movdqa [edi + 16], xmm2
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa (%0),%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#define HAS_I42XTOUYVYROW_SSE2
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_HALFROW_SSE2
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"sub %0,%1 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"pavgb (%0,%3),%%xmm0 \n"
"movdqa %%xmm0,(%0,%1) \n"
"lea 0x10(%0),%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,(%3) \n"
"movdqa %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"sub $0x10,%2 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "+r"(src_uv), // %0
"+r"(dst_uv), // %1
"+r"(pix) // %2
: "r"(static_cast<intptr_t>(src_uv_stride)) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#endif
void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[1];
dst_frame[3] = src_v[0];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[0]; // duplicate last y
dst_frame[3] = src_v[0];
}
}
void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[1];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[0]; // duplicate last y
}
}
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
, "xmm0"
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define READWORD(p) (*((uint32*) (p)))
#define WRITEWORD(p, v) (*((uint32*) (p))) = v
#else
uint32 READWORD(const uint8* p) {
return (uint32) p[0] |
((uint32) (p[1]) << 8) |
((uint32) (p[2]) << 16) |
((uint32) (p[3]) << 24);
}
void WRITEWORD(uint8* p, uint32 v) {
p[0] = (uint8)(v & 255);
p[1] = (uint8)((v >> 8) & 255);
p[2] = (uint8)((v >> 16) & 255);
p[3] = (uint8)((v >> 24) & 255);
);
}
#endif
// Must be multiple of 6 pixels. Will over convert to handle remainder.
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
for (int x = 0; x < width; x += 6) {
uint32 w = READWORD(src_v210 + 0);
dst_uyvy[0] = (w >> 2) & 0xff;
dst_uyvy[1] = (w >> 12) & 0xff;
dst_uyvy[2] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 4);
dst_uyvy[3] = (w >> 2) & 0xff;
dst_uyvy[4] = (w >> 12) & 0xff;
dst_uyvy[5] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 8);
dst_uyvy[6] = (w >> 2) & 0xff;
dst_uyvy[7] = (w >> 12) & 0xff;
dst_uyvy[8] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 12);
dst_uyvy[9] = (w >> 2) & 0xff;
dst_uyvy[10] = (w >> 12) & 0xff;
dst_uyvy[11] = (w >> 22) & 0xff;
src_v210 += 16;
dst_uyvy += 12;
static void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
for (int x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
#define EIGHTTOTEN(x) (x << 2 | x >> 6)
void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) {
for (int x = 0; x < width; x += 6) {
WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) |
(EIGHTTOTEN(src_uyvy[1]) << 10) |
(EIGHTTOTEN(src_uyvy[2]) << 20));
WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) |
(EIGHTTOTEN(src_uyvy[4]) << 10) |
(EIGHTTOTEN(src_uyvy[5]) << 20));
WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) |
(EIGHTTOTEN(src_uyvy[7]) << 10) |
(EIGHTTOTEN(src_uyvy[8]) << 20));
WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) |
(EIGHTTOTEN(src_uyvy[10]) << 10) |
(EIGHTTOTEN(src_uyvy[11]) << 20));
src_uyvy += 12;
dst_v210 += 16;
}
}
int I422ToYUY2(const uint8* src_y, int src_stride_y,
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
#if defined(HAS_HALFROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
}
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
HalfRow = HalfRow_SSE2;
} else
#endif
{
HalfRow = HalfRow_C;
}
for (int y = 0; y < height; ++y) {
I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// SubSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
HalfRow(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
}
if (height & 1) {
HalfRow(src_v, 0, dst_v, halfwidth);
}
return 0;
}
int I420ToYUY2(const uint8* src_y, int src_stride_y,
// Blends 32x2 pixels to 16x1
// source in scale.cc
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SCALEROWDOWN2_NEON
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
uint8* dst, int dst_width);
#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(YUV_DISABLE_ASM)
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#endif
void ScaleRowDown2Int_C(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_NEON;
} else
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
}
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_SSE2;
#endif
{
ScaleRowDown2 = ScaleRowDown2Int_C;
}
for (int y = 0; y < height - 1; y += 2) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I42xToYUY2Row(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// SubSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
ScaleRowDown2(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
}
if (height & 1) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
ScaleRowDown2(src_v, 0, dst_v, halfwidth);
}
return 0;
}
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// I400 is greyscale typically used in MJPG
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
}
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
return 0;
}
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
uint8* dst, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
// Copy plane
for (int y = 0; y < height; y += 2) {
memcpy(dst, src, width);
src += src_stride_0;
dst += dst_stride_frame;
memcpy(dst, src, width);
src += src_stride_1;
dst += dst_stride_frame;
}
}
// Support converting from FOURCC_M420
// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
// easy conversion to I420.
// M420 format description:
// M420 is row biplanar 420: 2 rows of Y and 1 row of VU.
// Chroma is half width / half height. (420)
// src_stride_m420 is row planar. Normally this will be the width in pixels.
// The UV plane is half width, but 2 values, so src_stride_m420 applies to
// this as well as the two Y planes.
static int X420ToI420(const uint8* src_y,
int src_stride_y0, int src_stride_y1,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
int halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_NEON;
} else
#elif defined(HAS_SPLITUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_SSE2;
} else
#endif
{
SplitUV = SplitUV_C;
}
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
if (dst_y) {
CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
width, height);
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) {
// Copy a row of UV.
SplitUV(src_uv, dst_u, dst_v, halfwidth);
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_uv += src_stride_uv;
}
return 0;
}
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
// Convert NV12 to I420.
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required
src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
return X420ToI420(src_y, src_stride_y, src_stride_y,
src_uv, src_stride_uv,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
// Convert M420 to I420.
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
__declspec(naked)
static void SplitYUY2_SSE2(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_yuy2
mov edx, [esp + 8 + 8] // dst_y
mov esi, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm2, xmm5 // even bytes are Y
pand xmm3, xmm5
packuswb xmm2, xmm3
movdqa [edx], xmm2
lea edx, [edx + 16]
psrlw xmm0, 8 // YUYV -> UVUV
psrlw xmm1, 8
packuswb xmm0, xmm1
movdqa xmm1, xmm0
pand xmm0, xmm5 // U
packuswb xmm0, xmm0
movq qword ptr [esi], xmm0
lea esi, [esi + 8]
psrlw xmm1, 8 // V
packuswb xmm1, xmm1
movq qword ptr [edi], xmm1
lea edi, [edi + 8]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm2 \n"
"pand %%xmm5,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm2,(%1) \n"
"lea 0x10(%1),%1 \n"
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pand %%xmm5,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movq %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm1,(%3) \n"
"lea 0x8(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(pix) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
static void SplitYUY2_C(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of YUY2.
for (int x = 0; x < pix; x += 2) {
dst_y[0] = src_yuy2[0];
dst_y[1] = src_yuy2[2];
dst_u[0] = src_yuy2[1];
dst_v[0] = src_yuy2[3];
src_yuy2 += 4;
dst_y += 2;
dst_u += 1;
dst_v += 1;
}
}
// Convert Q420 to I420.
// Format is rows of YY/YUYV
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix);
UYVYToV210Row = UYVYToV210Row_C;
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
int halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
void (*SplitYUY2)(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITYUY2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
SplitYUY2 = SplitYUY2_SSE2;
} else
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
{
SplitYUY2 = SplitYUY2_C;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
for (int y = 0; y < height; y += 2) {
memcpy(dst_y, src_y, width);
dst_y += dst_stride_y;
src_y += src_stride_y;
// Copy a row of YUY2.
SplitYUY2(src_yuy2, dst_y, dst_u, dst_v, width);
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_yuy2 += src_stride_yuy2;
}
return 0;
}
......@@ -647,6 +691,56 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
return 0;
}
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define READWORD(p) (*((uint32*) (p)))
#else
static inline uint32 READWORD(const uint8* p) {
return (uint32) p[0] |
((uint32) (p[1]) << 8) |
((uint32) (p[2]) << 16) |
((uint32) (p[3]) << 24);
}
#endif
// Must be multiple of 6 pixels. Will over convert to handle remainder.
// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210
static void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) {
for (int x = 0; x < width; x += 6) {
uint32 w = READWORD(src_v210 + 0);
dst_uyvy[0] = (w >> 2) & 0xff;
dst_uyvy[1] = (w >> 12) & 0xff;
dst_uyvy[2] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 4);
dst_uyvy[3] = (w >> 2) & 0xff;
dst_uyvy[4] = (w >> 12) & 0xff;
dst_uyvy[5] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 8);
dst_uyvy[6] = (w >> 2) & 0xff;
dst_uyvy[7] = (w >> 12) & 0xff;
dst_uyvy[8] = (w >> 22) & 0xff;
w = READWORD(src_v210 + 12);
dst_uyvy[9] = (w >> 2) & 0xff;
dst_uyvy[10] = (w >> 12) & 0xff;
dst_uyvy[11] = (w >> 22) & 0xff;
src_v210 += 16;
dst_uyvy += 12;
}
}
// Convert V210 to I420.
// V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels.
// With is multiple of 48.
......
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_from.h"
#include <string.h> // For memcpy()
#include "libyuv/basic_types.h"
#include "libyuv/convert.h" // For I420Copy
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#include "libyuv/video_common.h"
#include "row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
// UpSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_u, src_u, halfwidth);
memcpy(dst_u + dst_stride_u, src_u, halfwidth);
src_u += src_stride_u;
dst_u += dst_stride_u * 2;
}
if (height & 1) {
memcpy(dst_u, src_u, halfwidth);
}
// UpSample V plane.
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_v, src_v, halfwidth);
memcpy(dst_v + dst_stride_v, src_v, halfwidth);
src_v += src_stride_v;
dst_v += dst_stride_v * 2;
}
if (height & 1) {
memcpy(dst_v, src_v, halfwidth);
}
return 0;
}
// use Bilinear for upsampling chroma
void ScalePlaneBilinear(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr);
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Upsample U plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_u,
dst_stride_u,
src_u, dst_u);
// Upsample V plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_v,
dst_stride_v,
src_v, dst_v);
return 0;
}
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
}
// YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
// UYVY - Macro-pixel = 2 image pixels
// U0Y0V0Y1
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
__declspec(naked)
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
lea eax, [eax + 16]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2 // YUYV
punpckhbw xmm1, xmm2
movdqa [edi], xmm0
movdqa [edi + 16], xmm1
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#define HAS_I42XTOUYVYROW_SSE2
__declspec(naked)
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_y
mov esi, [esp + 8 + 8] // src_u
mov edx, [esp + 8 + 12] // src_v
mov edi, [esp + 8 + 16] // dst_frame
mov ecx, [esp + 8 + 20] // width
sub edx, esi
convertloop:
movq xmm2, qword ptr [esi] // U
movq xmm3, qword ptr [esi + edx] // V
lea esi, [esi + 8]
punpcklbw xmm2, xmm3 // UV
movdqa xmm0, [eax] // Y
movdqa xmm1, xmm2
lea eax, [eax + 16]
punpcklbw xmm1, xmm0 // UYVY
punpckhbw xmm2, xmm0
movdqa [edi], xmm1
movdqa [edi + 16], xmm2
lea edi, [edi + 32]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_I42XTOYUY2ROW_SSE2
static void I42xToYUY2Row_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa (%0),%%xmm0 \n"
"lea 0x10(%0),%0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"punpcklbw %%xmm2,%%xmm0 \n"
"punpckhbw %%xmm2,%%xmm1 \n"
"movdqa %%xmm0,(%3) \n"
"movdqa %%xmm1,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#define HAS_I42XTOUYVYROW_SSE2
static void I42xToUYVYRow_SSE2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_frame, int width) {
asm volatile (
"sub %1,%2 \n"
"1: \n"
"movq (%1),%%xmm2 \n"
"movq (%1,%2,1),%%xmm3 \n"
"lea 0x8(%1),%1 \n"
"punpcklbw %%xmm3,%%xmm2 \n"
"movdqa (%0),%%xmm0 \n"
"movdqa %%xmm2,%%xmm1 \n"
"lea 0x10(%0),%0 \n"
"punpcklbw %%xmm0,%%xmm1 \n"
"punpckhbw %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,(%3) \n"
"movdqa %%xmm2,0x10(%3) \n"
"lea 0x20(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_y), // %0
"+r"(src_u), // %1
"+r"(src_v), // %2
"+r"(dst_frame), // %3
"+rm"(width) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3"
#endif
);
}
#endif
static void I42xToYUY2Row_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[1];
dst_frame[3] = src_v[0];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_y[0];
dst_frame[1] = src_u[0];
dst_frame[2] = src_y[0]; // duplicate last y
dst_frame[3] = src_v[0];
}
}
static void I42xToUYVYRow_C(const uint8* src_y, const uint8* src_u, const uint8* src_v,
uint8* dst_frame, int width) {
for (int x = 0; x < width - 1; x += 2) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[1];
dst_frame += 4;
src_y += 2;
src_u += 1;
src_v += 1;
}
if (width & 1) {
dst_frame[0] = src_u[0];
dst_frame[1] = src_y[0];
dst_frame[2] = src_v[0];
dst_frame[3] = src_y[0]; // duplicate last y
}
}
// gcc provided macros
#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && defined(__BIG_ENDIAN)
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LIBYUV_LITTLE_ENDIAN
#endif
// Visual C for x86 defines these
#elif defined(_M_X64) || defined(_M_IX86)
#define LIBYUV_LITTLE_ENDIAN
#endif
#ifdef LIBYUV_LITTLE_ENDIAN
#define WRITEWORD(p, v) (*((uint32*) (p))) = v
#else
static inline void WRITEWORD(uint8* p, uint32 v) {
p[0] = (uint8)(v & 255);
p[1] = (uint8)((v >> 8) & 255);
p[2] = (uint8)((v >> 16) & 255);
p[3] = (uint8)((v >> 24) & 255);
}
#endif
#define EIGHTTOTEN(x) (x << 2 | x >> 6)
static void UYVYToV210Row_C(const uint8* src_uyvy, uint8* dst_v210, int width) {
for (int x = 0; x < width; x += 6) {
WRITEWORD(dst_v210 + 0, (EIGHTTOTEN(src_uyvy[0])) |
(EIGHTTOTEN(src_uyvy[1]) << 10) |
(EIGHTTOTEN(src_uyvy[2]) << 20));
WRITEWORD(dst_v210 + 4, (EIGHTTOTEN(src_uyvy[3])) |
(EIGHTTOTEN(src_uyvy[4]) << 10) |
(EIGHTTOTEN(src_uyvy[5]) << 20));
WRITEWORD(dst_v210 + 8, (EIGHTTOTEN(src_uyvy[6])) |
(EIGHTTOTEN(src_uyvy[7]) << 10) |
(EIGHTTOTEN(src_uyvy[8]) << 20));
WRITEWORD(dst_v210 + 12, (EIGHTTOTEN(src_uyvy[9])) |
(EIGHTTOTEN(src_uyvy[10]) << 10) |
(EIGHTTOTEN(src_uyvy[11]) << 20));
src_uyvy += 12;
dst_v210 += 16;
}
}
// TODO(fbarchard): Deprecate, move or expand 422 support?
int I422ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToYUY2Row(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
}
return 0;
}
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToYUY2Row)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToYUY2Row = I42xToYUY2Row_C;
#if defined(HAS_I42XTOYUY2ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToYUY2Row = I42xToYUY2Row_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
I42xToYUY2Row(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToYUY2Row(src_y, src_u, src_v, dst_frame, width);
}
return 0;
}
// TODO(fbarchard): Deprecate, move or expand 422 support?
int I422ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height; ++y) {
I42xToUYVYRow(src_y, src_u, src_y, dst_frame, width);
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame;
}
return 0;
}
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_frame, 16) && IS_ALIGNED(dst_stride_frame, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v,
dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, dst_frame, width);
}
return 0;
}
int I420ToV210(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height) {
if (width * 16 / 6 > kMaxStride || // row buffer of V210 is required
src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_frame = dst_frame + (height - 1) * dst_stride_frame;
dst_stride_frame = -dst_stride_frame;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*UYVYToV210Row)(const uint8* src_uyvy, uint8* dst_v210, int pix);
UYVYToV210Row = UYVYToV210Row_C;
void (*I42xToUYVYRow)(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* dst_frame, int width);
I42xToUYVYRow = I42xToUYVYRow_C;
#if defined(HAS_I42XTOUYVYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
I42xToUYVYRow = I42xToUYVYRow_SSE2;
}
#endif
for (int y = 0; y < height - 1; y += 2) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
I42xToUYVYRow(src_y + src_stride_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame + dst_stride_frame, width);
src_y += src_stride_y * 2;
src_u += src_stride_u;
src_v += src_stride_v;
dst_frame += dst_stride_frame * 2;
}
if (height & 1) {
I42xToUYVYRow(src_y, src_u, src_v, row, width);
UYVYToV210Row(row, dst_frame, width);
}
return 0;
}
// Convert I420 to ARGB.
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
}
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
}
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to BGRA.
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_bgra = -dst_stride_bgra;
}
void (*I420ToBGRARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToBGRARow = I420ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToBGRARow = I420ToBGRARow_NEON;
}
} else
#elif defined(HAS_I420TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToBGRARow = I420ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I420ToBGRARow = I420ToBGRARow_SSSE3;
}
} else
#endif
{
I420ToBGRARow = I420ToBGRARow_C;
}
for (int y = 0; y < height; ++y) {
I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ABGR.
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
void (*I420ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToABGRRow = I420ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToABGRRow = I420ToABGRRow_NEON;
}
} else
#elif defined(HAS_I420TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToABGRRow = I420ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I420ToABGRRow = I420ToABGRRow_SSSE3;
}
} else
#endif
{
I420ToABGRRow = I420ToABGRRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB24.
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
} else
#endif
{
ARGBToRGB24Row = ARGBToRGB24Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB24Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RAW.
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
} else
#endif
{
ARGBToRAWRow = ARGBToRAWRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRAWRow(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB565.
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb, int dst_stride_rgb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
dst_stride_rgb = -dst_stride_rgb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
} else
#endif
{
ARGBToRGB565Row = ARGBToRGB565Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB565Row(row, dst_rgb, width);
dst_rgb += dst_stride_rgb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB1555.
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
}
} else
#endif
{
ARGBToARGB1555Row = ARGBToARGB1555Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB1555Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ARGB4444.
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
}
} else
#endif
{
ARGBToARGB4444Row = ARGBToARGB4444Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB4444Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to specified format
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format) {
if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) {
return -1;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
I420ToYUY2(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_UYVY:
I420ToUYVY(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_V210:
I420ToV210(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride :
(width + 47) / 48 * 128,
width, height);
break;
case FOURCC_RGBP:
I420ToRGB565(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_RGBO:
I420ToARGB1555(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_R444:
I420ToARGB4444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_24BG:
I420ToRGB24(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_RAW:
I420ToRAW(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_ARGB:
I420ToARGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGRA:
I420ToBGRA(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_ABGR:
I420ToABGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGGR:
I420ToBayerBGGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GBRG:
I420ToBayerGBRG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GRBG:
I420ToBayerGRBG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_RGGB:
I420ToBayerRGGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_I400:
I400Copy(y, y_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
// Triplanar formats
// TODO(fbarchard): halfstride instead of halfwidth
case FOURCC_I420:
case FOURCC_YV12: {
int halfwidth = (width + 1) / 2;
int halfheight = (height + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I420) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * halfheight;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * halfheight;
}
I420Copy(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
int halfwidth = (width + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I422) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * height;
}
I420ToI422(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I444) {
dst_u = dst_sample + width * height;
dst_v = dst_u + width * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + width * height;
}
I420ToI444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, width,
dst_v, width,
width, height);
break;
}
// Formats not supported - MJPG, biplanar, some rgb formats.
default:
return -1; // unknown fourcc - return failure code.
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert.h"
#include "libyuv/basic_types.h"
#include "libyuv/format_conversion.h"
#include "libyuv/planar_functions.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert I420 to specified format
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format) {
if (y == NULL || u == NULL || v == NULL || dst_sample == NULL) {
return -1;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
I420ToYUY2(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_UYVY:
I420ToUYVY(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_V210:
I420ToV210(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride :
(width + 47) / 48 * 128,
width, height);
break;
case FOURCC_RGBP:
I420ToRGB565(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_RGBO:
I420ToARGB1555(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_R444:
I420ToARGB4444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 2,
width, height);
break;
case FOURCC_24BG:
I420ToRGB24(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_RAW:
I420ToRAW(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 3,
width, height);
break;
case FOURCC_ARGB:
I420ToARGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGRA:
I420ToBGRA(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_ABGR:
I420ToABGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width * 4,
width, height);
break;
case FOURCC_BGGR:
I420ToBayerBGGR(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GBRG:
I420ToBayerGBRG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_GRBG:
I420ToBayerGRBG(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_RGGB:
I420ToBayerRGGB(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
case FOURCC_I400:
I400Copy(y, y_stride,
dst_sample,
dst_sample_stride ? dst_sample_stride : width,
width, height);
break;
// Triplanar formats
// TODO(fbarchard): halfstride instead of halfwidth
case FOURCC_I420:
case FOURCC_YV12: {
int halfwidth = (width + 1) / 2;
int halfheight = (height + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I420) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * halfheight;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * halfheight;
}
I420Copy(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
int halfwidth = (width + 1) / 2;
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I422) {
dst_u = dst_sample + width * height;
dst_v = dst_u + halfwidth * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + halfwidth * height;
}
I420ToI422(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, halfwidth,
dst_v, halfwidth,
width, height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
uint8* dst_u;
uint8* dst_v;
if (format == FOURCC_I444) {
dst_u = dst_sample + width * height;
dst_v = dst_u + width * height;
} else {
dst_v = dst_sample + width * height;
dst_u = dst_v + width * height;
}
I420ToI444(y, y_stride,
u, u_stride,
v, v_stride,
dst_sample, width,
dst_u, width,
dst_v, width,
width, height);
break;
}
// Formats not supported - MJPG, biplanar, some rgb formats.
default:
return -1; // unknown fourcc - return failure code.
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
......@@ -20,116 +20,6 @@ namespace libyuv {
extern "C" {
#endif
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITUV_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
static void SplitUV_NEON(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"1: \n"
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
"vst1.u8 {q0}, [%1]! \n" // store U
"vst1.u8 {q1}, [%2]! \n" // Store V
"bhi 1b \n"
: "+r"(src_uv),
"+r"(dst_u),
"+r"(dst_v),
"+r"(pix) // Output registers
: // Input registers
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#elif defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITUV_SSE2
__declspec(naked)
static void SplitUV_SSE2(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm0, xmm5 // even bytes
pand xmm1, xmm5
packuswb xmm0, xmm1
psrlw xmm2, 8 // odd bytes
psrlw xmm3, 8
packuswb xmm2, xmm3
movdqa [edx], xmm0
movdqa [edx + edi], xmm2
lea edx, [edx + 16]
sub ecx, 16
ja convertloop
pop edi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITUV_SSE2
static void SplitUV_SSE2(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"psrlw $0x8,%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm0,(%1) \n"
"movdqa %%xmm2,(%1,%2) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%3 \n"
"ja 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
static void SplitUV_C(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of UV.
for (int x = 0; x < pix; ++x) {
dst_u[0] = src_uv[0];
dst_v[0] = src_uv[1];
src_uv += 2;
dst_u += 1;
dst_v += 1;
}
}
// CopyRows copys 'count' bytes using a 16 byte load/store, 64 bytes at time
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_COPYROW_SSE2
......@@ -203,1159 +93,132 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) {
"+c"(width_tmp) // %2
:
: "memory", "cc"
);
}
#endif
void CopyRow_C(const uint8* src, uint8* dst, int count) {
memcpy(dst, src, count);
}
// Copy a plane of data
void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
void (*CopyRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_SSE2;
} else
#endif
#if defined(HAS_COPYROW_X86)
if (IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
CopyRow = CopyRow_X86;
} else
#endif
{
CopyRow = CopyRow_C;
}
// Copy plane
for (int y = 0; y < height; ++y) {
CopyRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}
// Copy I420 with optional flipping
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Mirror a plane of data
void MirrorPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_NEON;
} else
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
MirrorRow = MirrorRow_SSSE3;
} else
#endif
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSE2;
} else
#endif
{
MirrorRow = MirrorRow_C;
}
// Mirror plane
for (int y = 0; y < height; ++y) {
MirrorRow(src_y, dst_y, width);
src_y += src_stride_y;
dst_y += dst_stride_y;
}
}
// Mirror I420 with optional flipping
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Copy ARGB with optional flipping
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0;
}
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_HALFROW_SSE2
__declspec(naked)
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // src_uv_stride
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
sub edi, eax
convertloop:
movdqa xmm0, [eax]
pavgb xmm0, [eax + edx]
movdqa [eax + edi], xmm0
lea eax, [eax + 16]
sub ecx, 16
ja convertloop
pop edi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_HALFROW_SSE2
static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
asm volatile (
"sub %0,%1 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"pavgb (%0,%3),%%xmm0 \n"
"movdqa %%xmm0,(%0,%1) \n"
"lea 0x10(%0),%0 \n"
"sub $0x10,%2 \n"
"ja 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_uv), // %1
"+r"(pix) // %2
: "r"(static_cast<intptr_t>(src_uv_stride)) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0"
#endif
);
}
#endif
void HalfRow_C(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix) {
for (int x = 0; x < pix; ++x) {
dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
}
}
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*HalfRow)(const uint8* src_uv, int src_uv_stride,
uint8* dst_uv, int pix);
#if defined(HAS_HALFROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
HalfRow = HalfRow_SSE2;
} else
#endif
{
HalfRow = HalfRow_C;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// SubSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
HalfRow(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
HalfRow(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
}
if (height & 1) {
HalfRow(src_v, 0, dst_v, halfwidth);
}
return 0;
}
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
// UpSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_u, src_u, halfwidth);
memcpy(dst_u + dst_stride_u, src_u, halfwidth);
src_u += src_stride_u;
dst_u += dst_stride_u * 2;
}
if (height & 1) {
memcpy(dst_u, src_u, halfwidth);
}
// UpSample V plane.
for (y = 0; y < height - 1; y += 2) {
memcpy(dst_v, src_v, halfwidth);
memcpy(dst_v + dst_stride_v, src_v, halfwidth);
src_v += src_stride_v;
dst_v += dst_stride_v * 2;
}
if (height & 1) {
memcpy(dst_v, src_v, halfwidth);
}
return 0;
}
// Blends 32x2 pixels to 16x1
// source in scale.cc
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_SCALEROWDOWN2_NEON
void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
uint8* dst, int dst_width);
#elif (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
!defined(YUV_DISABLE_ASM)
void ScaleRowDown2Int_SSE2(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#endif
void ScaleRowDown2Int_C(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*ScaleRowDown2)(const uint8* src_ptr, int src_stride,
uint8* dst_ptr, int dst_width);
#if defined(HAS_SCALEROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) &&
IS_ALIGNED(halfwidth, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_NEON;
} else
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
ScaleRowDown2 = ScaleRowDown2Int_SSE2;
#endif
{
ScaleRowDown2 = ScaleRowDown2Int_C;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
// SubSample U plane.
int y;
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth);
src_u += src_stride_u * 2;
dst_u += dst_stride_u;
}
if (height & 1) {
ScaleRowDown2(src_u, 0, dst_u, halfwidth);
}
// SubSample V plane.
for (y = 0; y < height - 1; y += 2) {
ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth);
src_v += src_stride_v * 2;
dst_v += dst_stride_v;
}
if (height & 1) {
ScaleRowDown2(src_v, 0, dst_v, halfwidth);
}
return 0;
}
// use Bilinear for upsampling chroma
void ScalePlaneBilinear(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr);
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (height - 1) * dst_stride_u;
dst_v = dst_v + (height - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
// Copy Y plane
if (dst_y) {
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
// Upsample U plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_u,
dst_stride_u,
src_u, dst_u);
// Upsample V plane.
ScalePlaneBilinear(halfwidth, halfheight,
width, height,
src_stride_v,
dst_stride_v,
src_v, dst_v);
return 0;
}
static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
uint8* dst, int dst_stride_frame,
int width, int height) {
// Copy plane
for (int y = 0; y < height; y += 2) {
memcpy(dst, src, width);
src += src_stride_0;
dst += dst_stride_frame;
memcpy(dst, src, width);
src += src_stride_1;
dst += dst_stride_frame;
}
}
// Support converting from FOURCC_M420
// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
// easy conversion to I420.
// M420 format description:
// M420 is row biplanar 420: 2 rows of Y and 1 row of VU.
// Chroma is half width / half height. (420)
// src_stride_m420 is row planar. Normally this will be the width in pixels.
// The UV plane is half width, but 2 values, so src_stride_m420 applies to
// this as well as the two Y planes.
static int X420ToI420(const uint8* src_y,
int src_stride_y0, int src_stride_y1,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) {
SplitUV = SplitUV_NEON;
} else
#elif defined(HAS_SPLITUV_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(halfwidth, 16) &&
IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUV = SplitUV_SSE2;
} else
#endif
{
SplitUV = SplitUV_C;
}
if (dst_y) {
CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y,
width, height);
}
int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) {
// Copy a row of UV.
SplitUV(src_uv, dst_u, dst_v, halfwidth);
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_uv += src_stride_uv;
}
return 0;
}
// Convert M420 to I420.
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2,
src_m420 + src_stride_m420 * 2, src_stride_m420 * 3,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
// Convert NV12 to I420.
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
return X420ToI420(src_y, src_stride_y, src_stride_y,
src_uv, src_stride_uv,
dst_y, dst_stride_y,
dst_u, dst_stride_u,
dst_v, dst_stride_v,
width, height);
}
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
__declspec(naked)
static void SplitYUY2_SSE2(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_yuy2
mov edx, [esp + 8 + 8] // dst_y
mov esi, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm2, xmm5 // even bytes are Y
pand xmm3, xmm5
packuswb xmm2, xmm3
movdqa [edx], xmm2
lea edx, [edx + 16]
psrlw xmm0, 8 // YUYV -> UVUV
psrlw xmm1, 8
packuswb xmm0, xmm1
movdqa xmm1, xmm0
pand xmm0, xmm5 // U
packuswb xmm0, xmm0
movq qword ptr [esi], xmm0
lea esi, [esi + 8]
psrlw xmm1, 8 // V
packuswb xmm1, xmm1
movq qword ptr [edi], xmm1
lea edi, [edi + 8]
sub ecx, 16
ja convertloop
pop edi
pop esi
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#define HAS_SPLITYUY2_SSE2
static void SplitYUY2_SSE2(const uint8* src_yuy2, uint8* dst_y,
uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm2 \n"
"pand %%xmm5,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm2,(%1) \n"
"lea 0x10(%1),%1 \n"
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"movdqa %%xmm0,%%xmm1 \n"
"pand %%xmm5,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
"movq %%xmm0,(%2) \n"
"lea 0x8(%2),%2 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm1 \n"
"movq %%xmm1,(%3) \n"
"lea 0x8(%3),%3 \n"
"sub $0x10,%4 \n"
"ja 1b \n"
: "+r"(src_yuy2), // %0
"+r"(dst_y), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(pix) // %4
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
static void SplitYUY2_C(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of YUY2.
for (int x = 0; x < pix; x += 2) {
dst_y[0] = src_yuy2[0];
dst_y[1] = src_yuy2[2];
dst_u[0] = src_yuy2[1];
dst_v[0] = src_yuy2[3];
src_yuy2 += 4;
dst_y += 2;
dst_u += 1;
dst_v += 1;
}
}
// Convert Q420 to I420.
// Format is rows of YY/YUYV
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
int halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
void (*SplitYUY2)(const uint8* src_yuy2,
uint8* dst_y, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITYUY2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16) &&
IS_ALIGNED(dst_u, 8) && IS_ALIGNED(dst_stride_u, 8) &&
IS_ALIGNED(dst_v, 8) && IS_ALIGNED(dst_stride_v, 8)) {
SplitYUY2 = SplitYUY2_SSE2;
} else
#endif
{
SplitYUY2 = SplitYUY2_C;
}
for (int y = 0; y < height; y += 2) {
memcpy(dst_y, src_y, width);
dst_y += dst_stride_y;
src_y += src_stride_y;
// Copy a row of YUY2.
SplitYUY2(src_yuy2, dst_y, dst_u, dst_v, width);
dst_y += dst_stride_y;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
src_yuy2 += src_stride_yuy2;
}
return 0;
}
// Convert I420 to ARGB.
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToARGBRow = I420ToARGBRow_NEON;
}
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
}
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to BGRA.
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
dst_stride_bgra = -dst_stride_bgra;
}
void (*I420ToBGRARow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToBGRARow = I420ToBGRARow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToBGRARow = I420ToBGRARow_NEON;
}
} else
#elif defined(HAS_I420TOBGRAROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToBGRARow = I420ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I420ToBGRARow = I420ToBGRARow_SSSE3;
}
} else
#endif
{
I420ToBGRARow = I420ToBGRARow_C;
}
for (int y = 0; y < height; ++y) {
I420ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to ABGR.
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
void (*I420ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToABGRRow = I420ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
I420ToABGRRow = I420ToABGRRow_NEON;
}
} else
#elif defined(HAS_I420TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToABGRRow = I420ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8) &&
IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I420ToABGRRow = I420ToABGRRow_SSSE3;
}
} else
#endif
{
I420ToABGRRow = I420ToABGRRow_C;
}
for (int y = 0; y < height; ++y) {
I420ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
}
// Convert I420 to RGB24.
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRGB24Row = ARGBToRGB24Row_SSSE3;
}
} else
);
}
#endif
{
ARGBToRGB24Row = ARGBToRGB24Row_C;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB24Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
return 0;
void CopyRow_C(const uint8* src, uint8* dst, int count) {
memcpy(dst, src, count);
}
// Convert I420 to RAW.
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
// Copy a plane of data
void CopyPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
void (*CopyRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(width, 32) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_SSE2;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORAWROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3;
if (IS_ALIGNED(width, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGBToRAWRow = ARGBToRAWRow_SSSE3;
}
#if defined(HAS_COPYROW_X86)
if (IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
CopyRow = CopyRow_X86;
} else
#endif
{
ARGBToRAWRow = ARGBToRAWRow_C;
CopyRow = CopyRow_C;
}
// Copy plane
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRAWRow(row, dst_argb, width);
dst_argb += dst_stride_argb;
CopyRow(src_y, dst_y, width);
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
dst_y += dst_stride_y;
}
return 0;
}
// Convert I420 to RGB565.
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgb, int dst_stride_rgb,
// Mirror a plane of data
void MirrorPlane(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
dst_stride_rgb = -dst_stride_rgb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
void (*MirrorRow)(const uint8* src, uint8* dst, int width);
#if defined(HAS_MIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
#endif
#if defined(HAS_MIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16)) {
MirrorRow = MirrorRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToRGB565Row)(const uint8* src_rgb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
}
#if defined(HAS_MIRRORROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
MirrorRow = MirrorRow_SSE2;
} else
#endif
{
ARGBToRGB565Row = ARGBToRGB565Row_C;
MirrorRow = MirrorRow_C;
}
// Mirror plane
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToRGB565Row(row, dst_rgb, width);
dst_rgb += dst_stride_rgb;
MirrorRow(src_y, dst_y, width);
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
dst_y += dst_stride_y;
}
return 0;
}
// Convert I420 to ARGB1555.
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
// Mirror I420 with optional flipping
int I420Mirror(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (!src_y || !src_u || !src_v ||
!dst_y || !dst_u || !dst_v ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
}
} else
#endif
{
ARGBToARGB1555Row = ARGBToARGB1555Row_C;
int halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB1555Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (dst_y) {
MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
}
MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
return 0;
}
// Convert I420 to ARGB4444.
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
// Copy ARGB with optional flipping
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
void (*I420ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(HAS_I420TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I420ToARGBRow = I420ToARGBRow_NEON;
} else
#elif defined(HAS_I420TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I420ToARGBRow = I420ToARGBRow_SSSE3;
} else
#endif
{
I420ToARGBRow = I420ToARGBRow_C;
}
SIMD_ALIGNED(uint8 row[kMaxStride]);
void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix);
#if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
}
} else
#endif
{
ARGBToARGB4444Row = ARGBToARGB4444Row_C;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
for (int y = 0; y < height; ++y) {
I420ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToARGB4444Row(row, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0;
}
......@@ -1977,7 +840,7 @@ static void SetRows32_C(uint8* dst, uint32 v32, int width,
}
#endif
static void SetPlane(uint8* dst_y, int dst_stride_y,
void SetPlane(uint8* dst_y, int dst_stride_y,
int width, int height,
uint32 value) {
void (*SetRow)(uint8* dst, uint32 value, int pix);
......@@ -2068,40 +931,6 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb,
return 0;
}
// I400 is greyscale typically used in MJPG
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
return 0;
}
// Copy to I400. Source can be I420,422,444,400,NV12,NV21
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -11,6 +11,7 @@
#include "libyuv/rotate.h"
#include "libyuv/cpu_id.h"
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h"
#include "rotate_priv.h"
#include "row.h"
......
......@@ -46,6 +46,7 @@ extern "C" {
#define HAS_I444TOARGBROW_SSSE3
#define HAS_MIRRORROW_SSSE3
#define HAS_MIRRORROW_SSE2
#define HAS_SPLITUV_SSE2
#define HAS_YUY2TOYROW_SSE2
#define HAS_UYVYTOYROW_SSE2
#define HAS_YUY2TOUVROW_SSE2
......@@ -67,6 +68,7 @@ extern "C" {
// The following are available on Neon platforms
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define HAS_MIRRORROW_NEON
#define HAS_SPLITUV_NEON
#define HAS_I420TOARGBROW_NEON
#define HAS_I420TOBGRAROW_NEON
#define HAS_I420TOABGRROW_NEON
......@@ -125,6 +127,10 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
void MirrorRow_C(const uint8* src, uint8* dst, int width);
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
void ABGRToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
......
......@@ -350,9 +350,7 @@ void I444ToARGBRow_C(const uint8* y_buf,
}
}
void YToARGBRow_C(const uint8* y_buf,
uint8* rgb_buf,
int width) {
void YToARGBRow_C(const uint8* y_buf, uint8* rgb_buf, int width) {
for (int x = 0; x < width; ++x) {
YuvPixel(y_buf[0], 128, 128, rgb_buf, 24, 16, 8, 0);
y_buf += 1;
......@@ -368,6 +366,17 @@ void MirrorRow_C(const uint8* src, uint8* dst, int width) {
}
}
void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of UV.
for (int x = 0; x < pix; ++x) {
dst_u[0] = src_uv[0];
dst_v[0] = src_uv[1];
src_uv += 2;
dst_u += 1;
dst_v += 1;
}
}
// Filter 2 rows of YUY2 UV's (422) into U and V (420)
void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix) {
......
......@@ -15,6 +15,9 @@ namespace libyuv {
extern "C" {
#endif
// This module is for GCC Neon
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
#define YUVTORGB \
"vld1.u8 {d0}, [%0]! \n" \
"vld1.u32 {d2[0]}, [%1]! \n" \
......@@ -160,6 +163,29 @@ YUVTORGB
}
#endif
#if defined(HAS_SPLITUV_NEON)
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"1: \n"
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
"subs %3, %3, #16 \n" // 16 processed per loop
"vst1.u8 {q0}, [%1]! \n" // store U
"vst1.u8 {q1}, [%2]! \n" // Store V
"bhi 1b \n"
: "+r"(src_uv),
"+r"(dst_u),
"+r"(dst_v),
"+r"(pix) // Output registers
: // Input registers
: "memory", "cc", "q0", "q1" // Clobber List
);
}
#endif
#endif // __ARM_NEON__
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -17,6 +17,9 @@ namespace libyuv {
extern "C" {
#endif
// This module is for GCC x86 and x64
#if (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
#ifdef __APPLE__
#define CONST
#else
......@@ -816,7 +819,7 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
"lea -0x10(%0),%0 \n"
"1: \n"
"movdqu (%0,%2),%%xmm0 \n"
"movdqu %%xmm0,%%xmm1 \n"
"movdqa %%xmm0,%%xmm1 \n"
"psllw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm1,%%xmm0 \n"
......@@ -839,6 +842,43 @@ void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
}
#endif
#ifdef HAS_SPLITUV_SSE2
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n"
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
"lea 0x20(%0),%0 \n"
"movdqa %%xmm0,%%xmm2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"pand %%xmm5,%%xmm0 \n"
"pand %%xmm5,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
"psrlw $0x8,%%xmm2 \n"
"psrlw $0x8,%%xmm3 \n"
"packuswb %%xmm3,%%xmm2 \n"
"movdqa %%xmm0,(%1) \n"
"movdqa %%xmm2,(%1,%2) \n"
"lea 0x10(%1),%1 \n"
"sub $0x10,%3 \n"
"ja 1b \n"
: "+r"(src_uv), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(pix) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
);
}
#endif
#ifdef HAS_YUY2TOYROW_SSE2
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
asm volatile (
......@@ -1099,9 +1139,10 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
#endif
);
}
#endif // HAS_YUY2TOYROW_SSE2
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
......@@ -15,6 +15,9 @@ namespace libyuv {
extern "C" {
#endif
// This module is for Visual C x86
#if defined(_M_IX86) && !defined(YUV_DISABLE_ASM)
#ifdef HAS_ARGBTOYROW_SSSE3
// Constant multiplication table for converting ARGB to I400.
......@@ -1503,7 +1506,7 @@ __asm {
#ifdef HAS_MIRRORROW_SSE2
// SSE2 version has movdqu so it can be used on misaligned buffers when SSSE3
// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3
// version can not.
__declspec(naked)
void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) {
......@@ -1514,7 +1517,7 @@ __asm {
lea eax, [eax - 16]
convertloop:
movdqu xmm0, [eax + ecx]
movdqu xmm1, xmm0 // swap bytes
movdqa xmm1, xmm0 // swap bytes
psllw xmm0, 8
psrlw xmm1, 8
por xmm0, xmm1
......@@ -1530,6 +1533,42 @@ __asm {
}
#endif
#ifdef HAS_SPLITUV_SSE2
__declspec(naked)
void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
__asm {
push edi
mov eax, [esp + 4 + 4] // src_uv
mov edx, [esp + 4 + 8] // dst_u
mov edi, [esp + 4 + 12] // dst_v
mov ecx, [esp + 4 + 16] // pix
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff
psrlw xmm5, 8
sub edi, edx
convertloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm0, xmm5 // even bytes
pand xmm1, xmm5
packuswb xmm0, xmm1
psrlw xmm2, 8 // odd bytes
psrlw xmm3, 8
packuswb xmm2, xmm3
movdqa [edx], xmm0
movdqa [edx + edi], xmm2
lea edx, [edx + 16]
sub ecx, 16
ja convertloop
pop edi
ret
}
}
#endif
#ifdef HAS_YUY2TOYROW_SSE2
__declspec(naked)
void YUY2ToYRow_SSE2(const uint8* src_yuy2,
......@@ -1800,6 +1839,8 @@ void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
}
#endif // HAS_YUY2TOYROW_SSE2
#endif // _M_IX86
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment