Commit aed1cc94 authored by mikhal@webrtc.org's avatar mikhal@webrtc.org

first draft


git-svn-id: http://libyuv.googlecode.com/svn/trunk@2 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 81a7628e
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_COMMON_BASIC_TYPES_H_
#define LIBYUV_COMMON_BASIC_TYPES_H_
#include <stddef.h> // for NULL, size_t
#ifndef WIN32
#include <stdint.h> // for uintptr_t
#endif
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "constructor_magic.h"
#ifndef INT_TYPES_DEFINED
#define INT_TYPES_DEFINED
#ifdef COMPILER_MSVC
typedef __int64 int64;
#else
typedef long long int64;
#endif /* COMPILER_MSVC */
typedef int int32;
typedef short int16;
typedef char int8;
#ifdef COMPILER_MSVC
typedef unsigned __int64 uint64;
typedef __int64 int64;
#ifndef INT64_C
#define INT64_C(x) x ## I64
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## UI64
#endif
#define INT64_F "I64"
#else
typedef unsigned long long uint64;
typedef long long int64;
#ifndef INT64_C
#define INT64_C(x) x ## LL
#endif
#ifndef UINT64_C
#define UINT64_C(x) x ## ULL
#endif
#define INT64_F "ll"
#endif /* COMPILER_MSVC */
typedef unsigned int uint32;
typedef unsigned short uint16;
typedef unsigned char uint8;
#endif // INT_TYPES_DEFINED
#ifdef WIN32
typedef int socklen_t;
#endif
namespace libyuv {
template<class T> inline T _min(T a, T b) { return (a > b) ? b : a; }
template<class T> inline T _max(T a, T b) { return (a < b) ? b : a; }
// For wait functions that take a number of milliseconds, kForever indicates
// unlimited time.
const int kForever = -1;
}
// Detect compiler is for x86 or x64.
#if defined(__x86_64__) || defined(_M_X64) || \
defined(__i386__) || defined(_M_IX86)
#define CPU_X86 1
#endif
#ifdef WIN32
#define alignof(t) __alignof(t)
#else // !WIN32
#define alignof(t) __alignof__(t)
#endif // !WIN32
#define IS_ALIGNED(p, a) (0==(reinterpret_cast<uintptr_t>(p) & ((a)-1)))
#define ALIGNP(p, t) \
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
((t)-1)) & ~((t)-1))))
#ifndef UNUSED
#define UNUSED(x) Unused(static_cast<const void *>(&x))
#define UNUSED2(x,y) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y))
#define UNUSED3(x,y,z) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y)); Unused(static_cast<const void *>(&z))
#define UNUSED4(x,y,z,a) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y)); Unused(static_cast<const void *>(&z)); Unused(static_cast<const void *>(&a))
#define UNUSED5(x,y,z,a,b) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y)); Unused(static_cast<const void *>(&z)); Unused(static_cast<const void *>(&a)); Unused(static_cast<const void *>(&b))
inline void Unused(const void *) { }
#endif // UNUSED
#if defined(__GNUC__)
#define GCC_ATTR(x) __attribute__ ((x))
#else // !__GNUC__
#define GCC_ATTR(x)
#endif // !__GNUC__
#endif // LIBYUV_COMMON_BASIC_TYPES_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_SOURCE_COMMON_H_
#define LIBYUV_SOURCE_COMMON_H_
#include "constructor_magic.h"
#if defined(_MSC_VER)
// warning C4355: 'this' : used in base member initializer list
#pragma warning(disable:4355)
#endif
//////////////////////////////////////////////////////////////////////
// General Utilities
//////////////////////////////////////////////////////////////////////
#ifndef UNUSED
#define UNUSED(x) Unused(static_cast<const void *>(&x))
#define UNUSED2(x,y) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y))
#define UNUSED3(x,y,z) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y)); Unused(static_cast<const void *>(&z))
#define UNUSED4(x,y,z,a) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y)); Unused(static_cast<const void *>(&z)); Unused(static_cast<const void *>(&a))
#define UNUSED5(x,y,z,a,b) Unused(static_cast<const void *>(&x)); Unused(static_cast<const void *>(&y)); Unused(static_cast<const void *>(&z)); Unused(static_cast<const void *>(&a)); Unused(static_cast<const void *>(&b))
inline void Unused(const void *) { }
#endif // UNUSED
#ifndef WIN32
#define strnicmp(x,y,n) strncasecmp(x,y,n)
#define stricmp(x,y) strcasecmp(x,y)
// TODO(sergeyu): Remove this. std::max should be used everywhere in the code.
// NOMINMAX must be defined where we include <windows.h>.
#define stdmax(x,y) std::max(x,y)
#else
#define stdmax(x,y) libyuv::_max(x,y)
#endif
#define ARRAY_SIZE(x) (static_cast<int>((sizeof(x)/sizeof(x[0]))))
/////////////////////////////////////////////////////////////////////////////
// Assertions
/////////////////////////////////////////////////////////////////////////////
#ifndef ENABLE_DEBUG
#define ENABLE_DEBUG _DEBUG
#endif // !defined(ENABLE_DEBUG)
#if ENABLE_DEBUG
namespace libyuv {
// Break causes the debugger to stop executing, or the program to abort
void Break();
// LogAssert writes information about an assertion to the log
void LogAssert(const char * function, const char * file, int line,
const char * expression);
inline bool Assert(bool result, const char * function, const char * file,
int line, const char * expression) {
if (!result) {
LogAssert(function, file, line, expression);
Break();
return false;
}
return true;
}
} // namespace libyuv
#if defined(_MSC_VER) && _MSC_VER < 1300
#define __FUNCTION__ ""
#endif
#ifndef ASSERT
#define ASSERT(x) (void)libyuv::Assert((x),__FUNCTION__,__FILE__,__LINE__,#x)
#endif
#ifndef VERIFY
#define VERIFY(x) libyuv::Assert((x),__FUNCTION__,__FILE__,__LINE__,#x)
#endif
#else // !ENABLE_DEBUG
namespace libyuv {
inline bool libyuv(bool result) { return result; }
} // namespace libyuv
#ifndef ASSERT
#define ASSERT(x) (void)0
#endif
#ifndef VERIFY
#define VERIFY(x) libyuv::ImplicitCastToBool(x)
#endif
#endif // !ENABLE_DEBUG
#define COMPILE_TIME_ASSERT(expr) char CTA_UNIQUE_NAME[expr]
#define CTA_UNIQUE_NAME CTA_MAKE_NAME(__LINE__)
#define CTA_MAKE_NAME(line) CTA_MAKE_NAME2(line)
#define CTA_MAKE_NAME2(line) constraint_ ## line
#ifdef __GNUC__
// Forces compiler to inline, even against its better judgement. Use wisely.
#define FORCE_INLINE __attribute__((always_inline))
#else
#define FORCE_INLINE
#endif
#endif // LIBYUV_SOURCE_COMMON_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_COMMON_CONSTRUCTOR_MAGIC_H_
#define LIBYUV_COMMON_CONSTRUCTOR_MAGIC_H_
#define DISALLOW_ASSIGN(TypeName) \
void operator=(const TypeName&)
// A macro to disallow the evil copy constructor and operator= functions
// This should be used in the private: declarations for a class
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&); \
DISALLOW_ASSIGN(TypeName)
// Alternative, less-accurate legacy name.
#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \
DISALLOW_COPY_AND_ASSIGN(TypeName)
// A macro to disallow all the implicit constructors, namely the
// default constructor, copy constructor and operator= functions.
//
// This should be used in the private: declarations for a class
// that wants to prevent anyone from instantiating it. This is
// especially useful for classes containing only static methods.
#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
TypeName(); \
DISALLOW_EVIL_CONSTRUCTORS(TypeName)
#endif // LIBYUV_COMMON_CONSTRUCTOR_MAGIC_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_INCLUDE_CONVERT_H_
#define LIBYUV_INCLUDE_CONVERT_H_
#include "basic_types.h"
namespace libyuv
{
int
ConvertI420ToRGB24(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToARGB(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToARGB4444(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToRGB565(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToRGB565Android(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToARGB1555(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertYV12ToARGB(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertYV12ToRGBA(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToYUY2(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertUYVYToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height);
int
ConvertI420ToYUY2(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToYV12(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertRGB24ToARGB(const uint8* src_frame, int src_stride,
uint8* dst_frame, int dst_stride,
int src_width, int src_height
);
int
ConvertRGB24ToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
);
int
ConvertABGRToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
);
int
ConvertNv12ToRGB565(const uint8* src_yplane, int src_ystride,
const uint8* src_uvplane, int src_uvstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
int
ConvertI420ToABGR(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
);
} // namespace libyuv
#endif // LIBYUV_INCLUDE_CONVERT_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_INCLUDE_FORMAT_CONVERSION_H_
#define LIBYUV_INCLUDE_FORMAT_CONVERSION_H_
#include "basic_types.h"
namespace libyuv {
// Converts any Bayer RGB format to I420.
void BayerRGBToI420(const uint8* src_bayer, int src_pitch_bayer,
uint32 src_fourcc_bayer,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Converts any 32 bit ARGB to any Bayer RGB format.
void RGB32ToBayerRGB(const uint8* src_rgb, int src_pitch_rgb,
uint32 src_fourcc_rgb,
uint8* dst_bayer, int dst_pitch_bayer,
uint32 dst_fourcc_bayer,
int width, int height);
} // namespace libyuv
#endif // LIBYUV_INCLUDE_FORMAT_CONVERSION_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* General operations on YUV images.
*/
#ifndef LIBYUV_INCLUDE_GENERAL_H_
#define LIBYUV_INCLUDE_GENERAL_H_
#include "basic_types.h"
namespace libyuv {
// Supported rotation
enum VideoRotationMode
{
kRotateNone = 0,
kRotateClockwise = 90,
kRotateAntiClockwise = -90,
kRotate180 = 180,
};
// Mirror functions
// The following 2 functions perform mirroring on an image (LeftRight/UpDown)
// Input:
// - width : Image width in pixels.
// - height : Image height in pixels.
// - inFrame : Reference to input image.
// - outFrame : Reference to converted image.
// Return value: 0 if OK, < 0 otherwise.
int
MirrorI420LeftRight(const uint8* src_frame, int src_stride,
uint8* dst_frame, int dst_stride,
int src_width, int src_height);
// Cut/Pad I420 frame to match desird dimensions.
int
CutPadI420Frame(const uint8* inFrame, int inWidth,
int inHeight, uint8* outFrame,
int outWidth, int outHeight);
// I420 Cut - make a center cut
int
CutI420Frame(uint8* frame, int fromWidth,
int fromHeight, int toWidth,
int toHeight);
} // namespace libyuv
#endif // LIBYUV_INCLUDE_GENERAL_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_INCLUDE_PLANAR_FUNCTIONS_H_
#define LIBYUV_INCLUDE_PLANAR_FUNCTIONS_H_
#include "basic_types.h"
namespace libyuv {
class PlanarFunctions {
public:
// Copy I420 to I420.
static void I420Copy(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert I422 to I420. Used by MJPG.
static void I422ToI420(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height);
// Convert M420 to I420.
static void M420ToI420(uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
const uint8* m420, int pitch_m420,
int width, int height);
// Convert NV12 to I420. Also used for NV21.
static void NV12ToI420(uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
const uint8* src_y,
const uint8* src_uv,
int src_pitch,
int width, int height);
DISALLOW_IMPLICIT_CONSTRUCTORS(PlanarFunctions);
};
} // namespace libyuv
#endif // LIBYUV_INCLUDE_PLANAR_FUNCTIONS_H_
#ifndef LIBYUV_INCLUDE_SCALE_H_
#define LIBYUV_INCLUDE_SCALE_H_
#include "basic_types.h"
#if defined(_MSC_VER)
#define ALIGN16(var) __declspec(align(16)) var
#else
#define ALIGN16(var) var __attribute__((aligned(16)))
#endif
namespace libyuv
{
class YuvScaler {
public:
// Scales a YUV 4:2:0 image from the input width and height to the
// output width and height. If outh_offset is nonzero, the image is
// offset by that many pixels and stretched to (outh - outh_offset * 2)
// pixels high, instead of outh.
// If interpolate is not set, a simple nearest-neighbor algorithm is
// used. This produces basic (blocky) quality at the fastest speed.
// If interpolate is set, interpolation is used to produce a better
// quality image, at the expense of speed.
// Returns true if successful.
static bool Scale(const uint8 *in, int32 inw, int32 inh,
uint8 *out, int32 outw, int32 outh, int32 outh_offset,
bool interpolate);
// Same, but specified in terms of each plane location and stride.
static bool Scale(const uint8 *inY, const uint8 *inU, const uint8 *inV,
int32 istrideY, int32 istrideU, int32 istrideV,
int32 iwidth, int32 iheight,
uint8 *outY, uint8 *outU, uint8 *outV,
int32 ostrideY, int32 ostrideU, int32 ostrideV,
int32 owidth, int32 oheight,
bool interpolate);
// For testing, allow disabling of optimizations.
static void SetUseReferenceImpl(bool use) { use_reference_impl_ = use; }
private:
static bool use_reference_impl_;
DISALLOW_IMPLICIT_CONSTRUCTORS(YuvScaler);
};
} // namespace libyuv
#endif // LIBYUV_INCLUDE_SCALE_H_
# Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'targets': [
{
'target_name': 'libyuv',
'type': 'static_library',
'dependencies': [
],
'include_dirs': [
'include',
'common',
],
'sources': [
# includes
'include/convert.h',
'include/general.h',
'include/scale.h',
'include/planar_functions.h',
# headers
'common/basic_types.h',
'common/common.h',
'common/constructor_magic.h',
'source/cpu_id.h',
'source/row.h',
'source/video_common.h',
# sources
'source/convert.cc',
'source/general.cc',
'source/scale.cc',
'source/cpu_id.cc',
'source/format_conversion.cc',
'source/planar_functions.cc',
'source/row_posix.cc',
'source/row_table.cc',
'source/video_common.cc',
],
},
], # targets
}
# Local Variables:
# tab-width:2
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=2 shiftwidth=2:
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/**************************************************************
* conversion_tables.h
*
* Pre-compiled definitions of the conversion equations: YUV -> RGB.
*
***************************************************************/
#ifndef WEBRTC_COMMON_VIDEO_VPLIB_CONVERSION_TABLES
#define WEBRTC_COMMON_VIDEO_VPLIB_CONVERSION_TABLES
namespace libyuv
{
/******************************************************************************
* YUV TO RGB approximation
*
* R = clip( (298 * (Y - 16) + 409 * (V - 128) + 128 ) >> 8 )
* G = clip( (298 * (Y - 16) - 100 * (U - 128) - 208 * (V - 128) + 128 ) >> 8 )
* B = clip( (298 * (Y - 16) + 516 * (U - 128) + 128 ) >> 8 )
*******************************************************************************/
#define Yc(i) static_cast<int> ( 298 * ( i - 16 )) // Y contribution
#define Ucg(i) static_cast<int> ( -100 * ( i - 128 ))// U contribution to G
#define Ucb(i) static_cast<int> ( 516 * ( i - 128 ))// U contribution to B
#define Vcr(i) static_cast<int> ( 409 * ( i - 128 ))// V contribution to R
#define Vcg(i) static_cast<int> ( -208 * ( i - 128 ))// V contribution to G
static const int mapYc[256] = {
Yc(0),Yc(1),Yc(2),Yc(3),Yc(4),Yc(5),Yc(6),Yc(7),Yc(8),Yc(9),
Yc(10),Yc(11),Yc(12),Yc(13),Yc(14),Yc(15),Yc(16),Yc(17),Yc(18),Yc(19),
Yc(20),Yc(21),Yc(22),Yc(23),Yc(24),Yc(25),Yc(26),Yc(27),Yc(28),Yc(29),
Yc(30),Yc(31),Yc(32),Yc(33),Yc(34),Yc(35),Yc(36),Yc(37),Yc(38),Yc(39),
Yc(40),Yc(41),Yc(42),Yc(43),Yc(44),Yc(45),Yc(46),Yc(47),Yc(48),Yc(49),
Yc(50),Yc(51),Yc(52),Yc(53),Yc(54),Yc(55),Yc(56),Yc(57),Yc(58),Yc(59),
Yc(60),Yc(61),Yc(62),Yc(63),Yc(64),Yc(65),Yc(66),Yc(67),Yc(68),Yc(69),
Yc(70),Yc(71),Yc(72),Yc(73),Yc(74),Yc(75),Yc(76),Yc(77),Yc(78),Yc(79),
Yc(80),Yc(81),Yc(82),Yc(83),Yc(84),Yc(85),Yc(86),Yc(87),Yc(88),Yc(89),
Yc(90),Yc(91),Yc(92),Yc(93),Yc(94),Yc(95),Yc(96),Yc(97),Yc(98),Yc(99),
Yc(100),Yc(101),Yc(102),Yc(103),Yc(104),Yc(105),Yc(106),Yc(107),Yc(108),
Yc(109),Yc(110),Yc(111),Yc(112),Yc(113),Yc(114),Yc(115),Yc(116),Yc(117),
Yc(118),Yc(119),Yc(120),Yc(121),Yc(122),Yc(123),Yc(124),Yc(125),Yc(126),
Yc(127),Yc(128),Yc(129),Yc(130),Yc(131),Yc(132),Yc(133),Yc(134),Yc(135),
Yc(136),Yc(137),Yc(138),Yc(139),Yc(140),Yc(141),Yc(142),Yc(143),Yc(144),
Yc(145),Yc(146),Yc(147),Yc(148),Yc(149),Yc(150),Yc(151),Yc(152),Yc(153),
Yc(154),Yc(155),Yc(156),Yc(157),Yc(158),Yc(159),Yc(160),Yc(161),Yc(162),
Yc(163),Yc(164),Yc(165),Yc(166),Yc(167),Yc(168),Yc(169),Yc(170),Yc(171),
Yc(172),Yc(173),Yc(174),Yc(175),Yc(176),Yc(177),Yc(178),Yc(179),Yc(180),
Yc(181),Yc(182),Yc(183),Yc(184),Yc(185),Yc(186),Yc(187),Yc(188),Yc(189),
Yc(190),Yc(191),Yc(192),Yc(193),Yc(194),Yc(195),Yc(196),Yc(197),Yc(198),
Yc(199),Yc(200),Yc(201),Yc(202),Yc(203),Yc(204),Yc(205),Yc(206),Yc(207),
Yc(208),Yc(209),Yc(210),Yc(211),Yc(212),Yc(213),Yc(214),Yc(215),Yc(216),
Yc(217),Yc(218),Yc(219),Yc(220),Yc(221),Yc(222),Yc(223),Yc(224),Yc(225),
Yc(226),Yc(227),Yc(228),Yc(229),Yc(230),Yc(231),Yc(232),Yc(233),Yc(234),
Yc(235),Yc(236),Yc(237),Yc(238),Yc(239),Yc(240),Yc(241),Yc(242),Yc(243),
Yc(244),Yc(245),Yc(246),Yc(247),Yc(248),Yc(249),Yc(250),Yc(251),Yc(252),
Yc(253),Yc(254),Yc(255)};
static const int mapUcg[256] = {
Ucg(0),Ucg(1),Ucg(2),Ucg(3),Ucg(4),Ucg(5),Ucg(6),Ucg(7),Ucg(8),Ucg(9),
Ucg(10),Ucg(11),Ucg(12),Ucg(13),Ucg(14),Ucg(15),Ucg(16),Ucg(17),Ucg(18),
Ucg(19),Ucg(20),Ucg(21),Ucg(22),Ucg(23),Ucg(24),Ucg(25),Ucg(26),Ucg(27),
Ucg(28),Ucg(29),Ucg(30),Ucg(31),Ucg(32),Ucg(33),Ucg(34),Ucg(35),Ucg(36),
Ucg(37),Ucg(38),Ucg(39),Ucg(40),Ucg(41),Ucg(42),Ucg(43),Ucg(44),Ucg(45),
Ucg(46),Ucg(47),Ucg(48),Ucg(49),Ucg(50),Ucg(51),Ucg(52),Ucg(53),Ucg(54),
Ucg(55),Ucg(56),Ucg(57),Ucg(58),Ucg(59),Ucg(60),Ucg(61),Ucg(62),Ucg(63),
Ucg(64),Ucg(65),Ucg(66),Ucg(67),Ucg(68),Ucg(69),Ucg(70),Ucg(71),Ucg(72),
Ucg(73),Ucg(74),Ucg(75),Ucg(76),Ucg(77),Ucg(78),Ucg(79),Ucg(80),Ucg(81),
Ucg(82),Ucg(83),Ucg(84),Ucg(85),Ucg(86),Ucg(87),Ucg(88),Ucg(89),Ucg(90),
Ucg(91),Ucg(92),Ucg(93),Ucg(94),Ucg(95),Ucg(96),Ucg(97),Ucg(98),Ucg(99),
Ucg(100),Ucg(101),Ucg(102),Ucg(103),Ucg(104),Ucg(105),Ucg(106),Ucg(107),
Ucg(108),Ucg(109),Ucg(110),Ucg(111),Ucg(112),Ucg(113),Ucg(114),Ucg(115),
Ucg(116),Ucg(117),Ucg(118),Ucg(119),Ucg(120),Ucg(121),Ucg(122),Ucg(123),
Ucg(124),Ucg(125),Ucg(126),Ucg(127),Ucg(128),Ucg(129),Ucg(130),Ucg(131),
Ucg(132),Ucg(133),Ucg(134),Ucg(135),Ucg(136),Ucg(137),Ucg(138),Ucg(139),
Ucg(140),Ucg(141),Ucg(142),Ucg(143),Ucg(144),Ucg(145),Ucg(146),Ucg(147),
Ucg(148),Ucg(149),Ucg(150),Ucg(151),Ucg(152),Ucg(153),Ucg(154),Ucg(155),
Ucg(156),Ucg(157),Ucg(158),Ucg(159),Ucg(160),Ucg(161),Ucg(162),Ucg(163),
Ucg(164),Ucg(165),Ucg(166),Ucg(167),Ucg(168),Ucg(169),Ucg(170),Ucg(171),
Ucg(172),Ucg(173),Ucg(174),Ucg(175),Ucg(176),Ucg(177),Ucg(178),Ucg(179),
Ucg(180),Ucg(181),Ucg(182),Ucg(183),Ucg(184),Ucg(185),Ucg(186),Ucg(187),
Ucg(188),Ucg(189),Ucg(190),Ucg(191),Ucg(192),Ucg(193),Ucg(194),Ucg(195),
Ucg(196),Ucg(197),Ucg(198),Ucg(199),Ucg(200),Ucg(201),Ucg(202),Ucg(203),
Ucg(204),Ucg(205),Ucg(206),Ucg(207),Ucg(208),Ucg(209),Ucg(210),Ucg(211),
Ucg(212),Ucg(213),Ucg(214),Ucg(215),Ucg(216),Ucg(217),Ucg(218),Ucg(219),
Ucg(220),Ucg(221),Ucg(222),Ucg(223),Ucg(224),Ucg(225),Ucg(226),Ucg(227),
Ucg(228),Ucg(229),Ucg(230),Ucg(231),Ucg(232),Ucg(233),Ucg(234),Ucg(235),
Ucg(236),Ucg(237),Ucg(238),Ucg(239),Ucg(240),Ucg(241),Ucg(242),Ucg(243),
Ucg(244),Ucg(245),Ucg(246),Ucg(247),Ucg(248),Ucg(249),Ucg(250),Ucg(251),
Ucg(252),Ucg(253),Ucg(254),Ucg(255)};
static const int mapUcb[256] = {
Ucb(0),Ucb(1),Ucb(2),Ucb(3),Ucb(4),Ucb(5),Ucb(6),Ucb(7),Ucb(8),Ucb(9),
Ucb(10),Ucb(11),Ucb(12),Ucb(13),Ucb(14),Ucb(15),Ucb(16),Ucb(17),Ucb(18),
Ucb(19),Ucb(20),Ucb(21),Ucb(22),Ucb(23),Ucb(24),Ucb(25),Ucb(26),Ucb(27),
Ucb(28),Ucb(29),Ucb(30),Ucb(31),Ucb(32),Ucb(33),Ucb(34),Ucb(35),Ucb(36),
Ucb(37),Ucb(38),Ucb(39),Ucb(40),Ucb(41),Ucb(42),Ucb(43),Ucb(44),Ucb(45),
Ucb(46),Ucb(47),Ucb(48),Ucb(49),Ucb(50),Ucb(51),Ucb(52),Ucb(53),Ucb(54),
Ucb(55),Ucb(56),Ucb(57),Ucb(58),Ucb(59),Ucb(60),Ucb(61),Ucb(62),Ucb(63),
Ucb(64),Ucb(65),Ucb(66),Ucb(67),Ucb(68),Ucb(69),Ucb(70),Ucb(71),Ucb(72),
Ucb(73),Ucb(74),Ucb(75),Ucb(76),Ucb(77),Ucb(78),Ucb(79),Ucb(80),Ucb(81),
Ucb(82),Ucb(83),Ucb(84),Ucb(85),Ucb(86),Ucb(87),Ucb(88),Ucb(89),Ucb(90),
Ucb(91),Ucb(92),Ucb(93),Ucb(94),Ucb(95),Ucb(96),Ucb(97),Ucb(98),Ucb(99),
Ucb(100),Ucb(101),Ucb(102),Ucb(103),Ucb(104),Ucb(105),Ucb(106),Ucb(107),
Ucb(108),Ucb(109),Ucb(110),Ucb(111),Ucb(112),Ucb(113),Ucb(114),Ucb(115),
Ucb(116),Ucb(117),Ucb(118),Ucb(119),Ucb(120),Ucb(121),Ucb(122),Ucb(123),
Ucb(124),Ucb(125),Ucb(126),Ucb(127),Ucb(128),Ucb(129),Ucb(130),Ucb(131),
Ucb(132),Ucb(133),Ucb(134),Ucb(135),Ucb(136),Ucb(137),Ucb(138),Ucb(139),
Ucb(140),Ucb(141),Ucb(142),Ucb(143),Ucb(144),Ucb(145),Ucb(146),Ucb(147),
Ucb(148),Ucb(149),Ucb(150),Ucb(151),Ucb(152),Ucb(153),Ucb(154),Ucb(155),
Ucb(156),Ucb(157),Ucb(158),Ucb(159),Ucb(160),Ucb(161),Ucb(162),Ucb(163),
Ucb(164),Ucb(165),Ucb(166),Ucb(167),Ucb(168),Ucb(169),Ucb(170),Ucb(171),
Ucb(172),Ucb(173),Ucb(174),Ucb(175),Ucb(176),Ucb(177),Ucb(178),Ucb(179),
Ucb(180),Ucb(181),Ucb(182),Ucb(183),Ucb(184),Ucb(185),Ucb(186),Ucb(187),
Ucb(188),Ucb(189),Ucb(190),Ucb(191),Ucb(192),Ucb(193),Ucb(194),Ucb(195),
Ucb(196),Ucb(197),Ucb(198),Ucb(199),Ucb(200),Ucb(201),Ucb(202),Ucb(203),
Ucb(204),Ucb(205),Ucb(206),Ucb(207),Ucb(208),Ucb(209),Ucb(210),Ucb(211),
Ucb(212),Ucb(213),Ucb(214),Ucb(215),Ucb(216),Ucb(217),Ucb(218),Ucb(219),
Ucb(220),Ucb(221),Ucb(222),Ucb(223),Ucb(224),Ucb(225),Ucb(226),Ucb(227),
Ucb(228),Ucb(229),Ucb(230),Ucb(231),Ucb(232),Ucb(233),Ucb(234),Ucb(235),
Ucb(236),Ucb(237),Ucb(238),Ucb(239),Ucb(240),Ucb(241),Ucb(242),Ucb(243),
Ucb(244),Ucb(245),Ucb(246),Ucb(247),Ucb(248),Ucb(249),Ucb(250),Ucb(251),
Ucb(252),Ucb(253),Ucb(254),Ucb(255)};
static const int mapVcr[256] = {
Vcr(0),Vcr(1),Vcr(2),Vcr(3),Vcr(4),Vcr(5),Vcr(6),Vcr(7),Vcr(8),Vcr(9),
Vcr(10),Vcr(11),Vcr(12),Vcr(13),Vcr(14),Vcr(15),Vcr(16),Vcr(17),Vcr(18),
Vcr(19),Vcr(20),Vcr(21),Vcr(22),Vcr(23),Vcr(24),Vcr(25),Vcr(26),Vcr(27),
Vcr(28),Vcr(29),Vcr(30),Vcr(31),Vcr(32),Vcr(33),Vcr(34),Vcr(35),Vcr(36),
Vcr(37),Vcr(38),Vcr(39),Vcr(40),Vcr(41),Vcr(42),Vcr(43),Vcr(44),Vcr(45),
Vcr(46),Vcr(47),Vcr(48),Vcr(49),Vcr(50),Vcr(51),Vcr(52),Vcr(53),Vcr(54),
Vcr(55),Vcr(56),Vcr(57),Vcr(58),Vcr(59),Vcr(60),Vcr(61),Vcr(62),Vcr(63),
Vcr(64),Vcr(65),Vcr(66),Vcr(67),Vcr(68),Vcr(69),Vcr(70),Vcr(71),Vcr(72),
Vcr(73),Vcr(74),Vcr(75),Vcr(76),Vcr(77),Vcr(78),Vcr(79),Vcr(80),Vcr(81),
Vcr(82),Vcr(83),Vcr(84),Vcr(85),Vcr(86),Vcr(87),Vcr(88),Vcr(89),Vcr(90),
Vcr(91),Vcr(92),Vcr(93),Vcr(94),Vcr(95),Vcr(96),Vcr(97),Vcr(98),Vcr(99),
Vcr(100),Vcr(101),Vcr(102),Vcr(103),Vcr(104),Vcr(105),Vcr(106),Vcr(107),
Vcr(108),Vcr(109),Vcr(110),Vcr(111),Vcr(112),Vcr(113),Vcr(114),Vcr(115),
Vcr(116),Vcr(117),Vcr(118),Vcr(119),Vcr(120),Vcr(121),Vcr(122),Vcr(123),
Vcr(124),Vcr(125),Vcr(126),Vcr(127),Vcr(128),Vcr(129),Vcr(130),Vcr(131),
Vcr(132),Vcr(133),Vcr(134),Vcr(135),Vcr(136),Vcr(137),Vcr(138),Vcr(139),
Vcr(140),Vcr(141),Vcr(142),Vcr(143),Vcr(144),Vcr(145),Vcr(146),Vcr(147),
Vcr(148),Vcr(149),Vcr(150),Vcr(151),Vcr(152),Vcr(153),Vcr(154),Vcr(155),
Vcr(156),Vcr(157),Vcr(158),Vcr(159),Vcr(160),Vcr(161),Vcr(162),Vcr(163),
Vcr(164),Vcr(165),Vcr(166),Vcr(167),Vcr(168),Vcr(169),Vcr(170),Vcr(171),
Vcr(172),Vcr(173),Vcr(174),Vcr(175),Vcr(176),Vcr(177),Vcr(178),Vcr(179),
Vcr(180),Vcr(181),Vcr(182),Vcr(183),Vcr(184),Vcr(185),Vcr(186),Vcr(187),
Vcr(188),Vcr(189),Vcr(190),Vcr(191),Vcr(192),Vcr(193),Vcr(194),Vcr(195),
Vcr(196),Vcr(197),Vcr(198),Vcr(199),Vcr(200),Vcr(201),Vcr(202),Vcr(203),
Vcr(204),Vcr(205),Vcr(206),Vcr(207),Vcr(208),Vcr(209),Vcr(210),Vcr(211),
Vcr(212),Vcr(213),Vcr(214),Vcr(215),Vcr(216),Vcr(217),Vcr(218),Vcr(219),
Vcr(220),Vcr(221),Vcr(222),Vcr(223),Vcr(224),Vcr(225),Vcr(226),Vcr(227),
Vcr(228),Vcr(229),Vcr(230),Vcr(231),Vcr(232),Vcr(233),Vcr(234),Vcr(235),
Vcr(236),Vcr(237),Vcr(238),Vcr(239),Vcr(240),Vcr(241),Vcr(242),Vcr(243),
Vcr(244),Vcr(245),Vcr(246),Vcr(247),Vcr(248),Vcr(249),Vcr(250),Vcr(251),
Vcr(252),Vcr(253),Vcr(254),Vcr(255)};
static const int mapVcg[256] = {
Vcg(0),Vcg(1),Vcg(2),Vcg(3),Vcg(4),Vcg(5),Vcg(6),Vcg(7),Vcg(8),Vcg(9),
Vcg(10),Vcg(11),Vcg(12),Vcg(13),Vcg(14),Vcg(15),Vcg(16),Vcg(17),Vcg(18),
Vcg(19),Vcg(20),Vcg(21),Vcg(22),Vcg(23),Vcg(24),Vcg(25),Vcg(26),Vcg(27),
Vcg(28),Vcg(29),Vcg(30),Vcg(31),Vcg(32),Vcg(33),Vcg(34),Vcg(35),Vcg(36),
Vcg(37),Vcg(38),Vcg(39),Vcg(40),Vcg(41),Vcg(42),Vcg(43),Vcg(44),Vcg(45),
Vcg(46),Vcg(47),Vcg(48),Vcg(49),Vcg(50),Vcg(51),Vcg(52),Vcg(53),Vcg(54),
Vcg(55),Vcg(56),Vcg(57),Vcg(58),Vcg(59),Vcg(60),Vcg(61),Vcg(62),Vcg(63),
Vcg(64),Vcg(65),Vcg(66),Vcg(67),Vcg(68),Vcg(69),Vcg(70),Vcg(71),Vcg(72),
Vcg(73),Vcg(74),Vcg(75),Vcg(76),Vcg(77),Vcg(78),Vcg(79),Vcg(80),Vcg(81),
Vcg(82),Vcg(83),Vcg(84),Vcg(85),Vcg(86),Vcg(87),Vcg(88),Vcg(89),Vcg(90),
Vcg(91),Vcg(92),Vcg(93),Vcg(94),Vcg(95),Vcg(96),Vcg(97),Vcg(98),Vcg(99),
Vcg(100),Vcg(101),Vcg(102),Vcg(103),Vcg(104),Vcg(105),Vcg(106),Vcg(107),
Vcg(108),Vcg(109),Vcg(110),Vcg(111),Vcg(112),Vcg(113),Vcg(114),Vcg(115),
Vcg(116),Vcg(117),Vcg(118),Vcg(119),Vcg(120),Vcg(121),Vcg(122),Vcg(123),
Vcg(124),Vcg(125),Vcg(126),Vcg(127),Vcg(128),Vcg(129),Vcg(130),Vcg(131),
Vcg(132),Vcg(133),Vcg(134),Vcg(135),Vcg(136),Vcg(137),Vcg(138),Vcg(139),
Vcg(140),Vcg(141),Vcg(142),Vcg(143),Vcg(144),Vcg(145),Vcg(146),Vcg(147),
Vcg(148),Vcg(149),Vcg(150),Vcg(151),Vcg(152),Vcg(153),Vcg(154),Vcg(155),
Vcg(156),Vcg(157),Vcg(158),Vcg(159),Vcg(160),Vcg(161),Vcg(162),Vcg(163),
Vcg(164),Vcg(165),Vcg(166),Vcg(167),Vcg(168),Vcg(169),Vcg(170),Vcg(171),
Vcg(172),Vcg(173),Vcg(174),Vcg(175),Vcg(176),Vcg(177),Vcg(178),Vcg(179),
Vcg(180),Vcg(181),Vcg(182),Vcg(183),Vcg(184),Vcg(185),Vcg(186),Vcg(187),
Vcg(188),Vcg(189),Vcg(190),Vcg(191),Vcg(192),Vcg(193),Vcg(194),Vcg(195),
Vcg(196),Vcg(197),Vcg(198),Vcg(199),Vcg(200),Vcg(201),Vcg(202),Vcg(203),
Vcg(204),Vcg(205),Vcg(206),Vcg(207),Vcg(208),Vcg(209),Vcg(210),Vcg(211),
Vcg(212),Vcg(213),Vcg(214),Vcg(215),Vcg(216),Vcg(217),Vcg(218),Vcg(219),
Vcg(220),Vcg(221),Vcg(222),Vcg(223),Vcg(224),Vcg(225),Vcg(226),Vcg(227),
Vcg(228),Vcg(229),Vcg(230),Vcg(231),Vcg(232),Vcg(233),Vcg(234),Vcg(235),
Vcg(236),Vcg(237),Vcg(238),Vcg(239),Vcg(240),Vcg(241),Vcg(242),Vcg(243),
Vcg(244),Vcg(245),Vcg(246),Vcg(247),Vcg(248),Vcg(249),Vcg(250),Vcg(251),
Vcg(252),Vcg(253),Vcg(254),Vcg(255)};
} // namespace libyuv
#endif
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "convert.h"
#include <string.h> // memcpy(), memset()
#include <assert.h>
#include <stdlib.h> // abs
//#define SCALEOPT //Currently for windows only. June 2010
#ifdef SCALEOPT
#include <emmintrin.h>
#endif
#include "conversion_tables.h"
namespace libyuv
{
// Clip value to [0,255]
inline uint8 Clip(int32 val);
#ifdef SCALEOPT
void *memcpy_16(void * dest, const void * src, size_t n);
void *memcpy_8(void * dest, const void * src, size_t n);
#endif
int
ConvertI420ToRGB24(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
// RGB orientation - bottom up
uint8* out = dst_frame + dst_stride * src_height * 3 - dst_stride * 3;
uint8* out2 = out - dst_stride * 3;
int h, w;
int tmpR, tmpG, tmpB;
const uint8 *y1, *y2 ,*u, *v;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[2] = Clip(tmpR);
out[1] = Clip(tmpG);
out[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[2] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[5] = Clip(tmpR);
out[4] = Clip(tmpG);
out[3] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[5] = Clip(tmpR);
out2[4] = Clip(tmpG);
out2[3] = Clip(tmpB);
out += 6;
out2 += 6;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += src_ystride + src_ystride - src_width;
y2 += src_ystride + src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= dst_stride * 9;
out2 -= dst_stride * 9;
} // end height for
return 0;
}
int
ConvertI420ToARGB(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint8* out1 = dst_frame;
uint8* out2 = out1 + dst_stride * 4;
const uint8 *y1,*y2, *u, *v;
y1 = src_yplane;
y2 = src_yplane + src_ystride;
u = src_uplane;
v = src_vplane;
int h, w;
int tmpR, tmpG, tmpB;
for (h = (src_height >> 1); h > 0; h--)
{
// Do 2 rows at the time
for (w = 0; w < (src_width >> 1); w++)
{ // Vertical and horizontal sub-sampling
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out1[3] = 0xff;
out1[2] = Clip(tmpR);
out1[1] = Clip(tmpG);
out1[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[3] = 0xff;
out2[2] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out1[7] = 0xff;
out1[6] = Clip(tmpR);
out1[5] = Clip(tmpG);
out1[4] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[7] = 0xff;
out2[6] = Clip(tmpR);
out2[5] = Clip(tmpG);
out2[4] = Clip(tmpB);
out1 += 8;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out1 += (2 * dst_stride - src_width) * 4;
out2 += (2 * dst_stride - src_width) * 4;
} // end height for
return 0;
}
int
ConvertYV12ToRGBA(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
int32 diff = dst_stride - src_width;
uint8 * out = dst_frame;
uint8 * out2 = out + dst_stride * 4;
const uint8 *y1,*y2, *u, *v;
int tmpG, tmpB, tmpR;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
v = src_vplane;
u = src_uplane;
for (h = (src_height >> 1); h > 0; h--)
{
// Do 2 rows at the time
for (w = 0; w < (src_width >> 1); w++)
{
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128 ) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128 ) >> 8);
out[1] = Clip(tmpR);
out[2] = Clip(tmpG);
out[3] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = Clip(tmpR);
out2[2] = Clip(tmpG);
out2[3] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[5] = Clip(tmpR);
out[6] = Clip(tmpG);
out[7] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[5] = Clip(tmpR);
out2[6] = Clip(tmpG);
out2[7] = Clip(tmpB);
out[0] = 0xff;
out[4] = 0xff;
out += 8;
out2[0] = 0xff;
out2[4] = 0xff;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out += (src_width + diff * 2) * 4;
out2 += (src_width + diff * 2) * 4;
}
return 0;
}
// Little Endian...
int
ConvertI420ToARGB4444(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
// RGB orientation - bottom up
uint8* out = dst_frame + dst_stride * (src_height - 1) * 2;
uint8* out2 = out - (2 * dst_stride);
int tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
int h, w;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// Convert to RGB888 and re-scale to 4 bits
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] =(uint8)((Clip(tmpG) & 0xf0) + (Clip(tmpB) >> 4));
out[1] = (uint8)(0xf0 + (Clip(tmpR) >> 4));
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint8)((Clip(tmpG) & 0xf0 ) + (Clip(tmpB) >> 4));
out2[1] = (uint8) (0xf0 + (Clip(tmpR) >> 4));
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[2] = (uint8)((Clip(tmpG) & 0xf0 ) + (Clip(tmpB) >> 4));
out[3] = (uint8)(0xf0 + (Clip(tmpR) >> 4));
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[2] = (uint8)((Clip(tmpG) & 0xf0 ) + (Clip(tmpB) >> 4));
out2[3] = (uint8)(0xf0 + (Clip(tmpR) >> 4));
out += 4;
out2 += 4;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= (2 * dst_stride + src_width) * 2;
out2 -= (2 * dst_stride + src_width) * 2;
} // end height for
return 0;
}
int
ConvertI420ToRGB565(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint16* out = (uint16*)(dst_frame) + dst_stride * (src_height - 1);
uint16* out2 = out - dst_stride;
int tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
int h, w;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB ) >> 3);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= 2 * dst_stride + src_width;
out2 -= 2 * dst_stride + src_width;
} // end height for
return 0;
}
//Same as ConvertI420ToRGB565 but doesn't flip vertically.
int
ConvertI420ToRGB565Android(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint16* out = (uint16*)(dst_frame);
uint16* out2 = out + dst_stride;
int tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB ) >> 3);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out += 2 * dst_stride - src_width;
out2 += 2 * dst_stride - src_width;
} // end height for
return 0;
}
int
ConvertI420ToARGB1555(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
uint16* out = (uint16*)(dst_frame) + dst_stride * (src_height - 1);
uint16* out2 = out - dst_stride ;
int32 tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{ // 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 555
// 3. Add 1 for alpha value
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
out[0] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[0] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[1] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[1] = (uint16)(0x8000 + ((Clip(tmpR) & 0xf8) << 10) +
((Clip(tmpG) & 0xf8) << 3) + (Clip(tmpB) >> 3));
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= 2 * dst_stride + src_width;
out2 -= 2 * dst_stride + src_width;
} // end height for
return 0;
}
int
ConvertI420ToYUY2(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
const uint8* in1 = src_yplane;
const uint8* in2 = src_yplane + src_ystride ;
const uint8* inU = src_uplane;
const uint8* inV = src_vplane;
uint8* out1 = dst_frame;
uint8* out2 = dst_frame + 2 * dst_stride;
// YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
#ifndef SCALEOPT
for (int i = 0; i < (src_height >> 1);i++)
{
for (int j = 0; j < (src_width >> 1);j++)
{
out1[0] = in1[0];
out1[1] = *inU;
out1[2] = in1[1];
out1[3] = *inV;
out2[0] = in2[0];
out2[1] = *inU;
out2[2] = in2[1];
out2[3] = *inV;
out1 += 4;
out2 += 4;
inU++;
inV++;
in1 += 2;
in2 += 2;
}
in1 += 2 * src_ystride - src_width;
in2 += 2 * src_ystride - src_width;
inU += src_ustride - (src_width >> 1);
inV += src_vstride - (src_width >> 1);
out1 += 2 * dst_stride + 2 * (dst_stride - src_width);
out2 += 2 * dst_stride + 2 * (dst_stride - src_width);
}
#else
for (WebRtc_UWord32 i = 0; i < (height >> 1);i++)
{
int32 width__ = (width >> 4);
_asm
{
;pusha
mov eax, DWORD PTR [in1] ;1939.33
mov ecx, DWORD PTR [in2] ;1939.33
mov ebx, DWORD PTR [inU] ;1939.33
mov edx, DWORD PTR [inV] ;1939.33
loop0:
movq xmm6, QWORD PTR [ebx] ;inU
movq xmm0, QWORD PTR [edx] ;inV
punpcklbw xmm6, xmm0 ;inU, inV mix
;movdqa xmm1, xmm6
;movdqa xmm2, xmm6
;movdqa xmm4, xmm6
movdqu xmm3, XMMWORD PTR [eax] ;in1
movdqa xmm1, xmm3
punpcklbw xmm1, xmm6 ;in1, inU, in1, inV
mov esi, DWORD PTR [out1]
movdqu XMMWORD PTR [esi], xmm1 ;write to out1
movdqu xmm5, XMMWORD PTR [ecx] ;in2
movdqa xmm2, xmm5
punpcklbw xmm2, xmm6 ;in2, inU, in2, inV
mov edi, DWORD PTR [out2]
movdqu XMMWORD PTR [edi], xmm2 ;write to out2
punpckhbw xmm3, xmm6 ;in1, inU, in1, inV again
movdqu XMMWORD PTR [esi+16], xmm3 ;write to out1 again
add esi, 32
mov DWORD PTR [out1], esi
punpckhbw xmm5, xmm6 ;inU, in2, inV again
movdqu XMMWORD PTR [edi+16], xmm5 ;write to out2 again
add edi, 32
mov DWORD PTR [out2], edi
add ebx, 8
add edx, 8
add eax, 16
add ecx, 16
mov esi, DWORD PTR [width__]
sub esi, 1
mov DWORD PTR [width__], esi
jg loop0
mov DWORD PTR [in1], eax ;1939.33
mov DWORD PTR [in2], ecx ;1939.33
mov DWORD PTR [inU], ebx ;1939.33
mov DWORD PTR [inV], edx ;1939.33
;popa
emms
}
in1 += 2 * src_ystride - src_width;
in2 += 2 * src_ystride - src_width;
out1 += 2 * strideOut + 2 * (strideOut - width);
out2 += 2 * strideOut + 2 * (strideOut - width);
}
#endif
return 0;
}
int
ConvertI420ToUYVY(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
int i = 0;
const uint8* y1 = src_yplane;
const uint8* y2 = y1 + src_ystride;
const uint8* u = src_uplane;
const uint8* v = src_vplane;
uint8* out1 = dst_frame;
uint8* out2 = dst_frame + 2 * dst_stride;
// Macro-pixel = 2 image pixels
// U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
#ifndef SCALEOPT
for (; i < (src_height >> 1);i++)
{
for (uint32 j = 0; j < (src_width >> 1) ;j++)
{
out1[0] = *u;
out1[1] = y1[0];
out1[2] = *v;
out1[3] = y1[1];
out2[0] = *u;
out2[1] = y2[0];
out2[2] = *v;
out2[3] = y2[1];
out1 += 4;
out2 += 4;
u++;
v++;
y1 += 2;
y2 += 2;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out1 += 2 * (dst_stride + (dst_stride - src_width));
out2 += 2 * (dst_stride + (dst_stride - src_width));
}
#else
for (; i < (height >> 1);i++)
{
int32 width__ = (width >> 4);
_asm
{
;pusha
mov eax, DWORD PTR [in1] ;1939.33
mov ecx, DWORD PTR [in2] ;1939.33
mov ebx, DWORD PTR [inU] ;1939.33
mov edx, DWORD PTR [inV] ;1939.33
loop0:
movq xmm6, QWORD PTR [ebx] ;inU
movq xmm0, QWORD PTR [edx] ;inV
punpcklbw xmm6, xmm0 ;inU, inV mix
movdqa xmm1, xmm6
movdqa xmm2, xmm6
movdqa xmm4, xmm6
movdqu xmm3, XMMWORD PTR [eax] ;in1
punpcklbw xmm1, xmm3 ;inU, in1, inV
mov esi, DWORD PTR [out1]
movdqu XMMWORD PTR [esi], xmm1 ;write to out1
movdqu xmm5, XMMWORD PTR [ecx] ;in2
punpcklbw xmm2, xmm5 ;inU, in2, inV
mov edi, DWORD PTR [out2]
movdqu XMMWORD PTR [edi], xmm2 ;write to out2
punpckhbw xmm4, xmm3 ;inU, in1, inV again
movdqu XMMWORD PTR [esi+16], xmm4 ;write to out1 again
add esi, 32
mov DWORD PTR [out1], esi
punpckhbw xmm6, xmm5 ;inU, in2, inV again
movdqu XMMWORD PTR [edi+16], xmm6 ;write to out2 again
add edi, 32
mov DWORD PTR [out2], edi
add ebx, 8
add edx, 8
add eax, 16
add ecx, 16
mov esi, DWORD PTR [width__]
sub esi, 1
mov DWORD PTR [width__], esi
jg loop0
mov DWORD PTR [in1], eax ;1939.33
mov DWORD PTR [in2], ecx ;1939.33
mov DWORD PTR [inU], ebx ;1939.33
mov DWORD PTR [inV], edx ;1939.33
;popa
emms
}
in1 += width;
in2 += width;
out1 += 2 * (strideOut + (strideOut - width));
out2 += 2 * (strideOut + (strideOut - width));
}
#endif
return 0;
}
int
ConvertI420ToYV12(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
const uint8* inFrame = src_yplane;
uint8* outFrame = dst_frame;
// Copy Y
for (int i = 0; i < src_height; i++)
{
#ifndef SCALEOPT
memcpy(outFrame, inFrame, src_width);
#else
memcpy_16(outFrame, inFrame, src_width);
#endif
inFrame += src_ystride;
outFrame += dst_stride;
}
// Copy U
inFrame = src_uplane;
outFrame += (dst_stride >> 1) * src_height >> 1;
for (uint32 i = 0; i < src_height >>1; i++)
{
#ifndef SCALEOPT
memcpy(outFrame, inFrame, src_width >> 1);
#else
memcpy_8(outFrame, inFrame, src_width >> 1);
#endif
inFrame += src_ustride;
outFrame += dst_stride >> 1;
}
outFrame -= dst_stride * src_height >> 1;
// Copy V
for (uint32 i = 0; i < src_height >> 1; i++)
{
#ifndef SCALEOPT
memcpy(outFrame, inFrame, src_width >> 1);
#else
memcpy_8(outFrame, inFrame, width >> 1);
#endif
inFrame += src_vstride;
outFrame += dst_stride >> 1;
}
return 0;
}
int
ConvertNv12ToRGB565(const uint8* src_yplane, int src_ystride,
const uint8* src_uvplane, int src_uvstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_height < 1 || src_height < 1 || src_ystride < 1 || src_uvstride < 1)
{
return -1;
}
// Bi-Planar: Y plane followed by an interlaced U and V plane
const uint8* interlacedSrc = src_uvplane;
uint16* out = (uint16*)(src_yplane) + dst_stride * (src_height - 1);
uint16* out2 = out - dst_stride;
int32 tmpR, tmpG, tmpB;
const uint8 *y1,*y2;
y1 = src_yplane;
y2 = y1 + src_ystride;
int h, w;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
// 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565
tmpR = (int32)((mapYc[y1[0]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out2[0] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB ) >> 3);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[interlacedSrc[1]]
+ 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[interlacedSrc[0]]
+ mapVcg[interlacedSrc[1]] + 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[interlacedSrc[0]]
+ 128) >> 8);
out2[1] = (uint16)((Clip(tmpR) & 0xf8) << 8) + ((Clip(tmpG)
& 0xfc) << 3) + (Clip(tmpB) >> 3);
y1 += 2;
y2 += 2;
out += 2;
out2 += 2;
interlacedSrc += 2;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
interlacedSrc += src_uvstride - (src_width >> 1);
out -= 3 * dst_stride + dst_stride - src_width;
out2 -= 3 * dst_stride + dst_stride - src_width;
} // end height for
return 0;
}
int
ConvertI420ToABGR(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
// RGB orientation - bottom up
uint8* out = dst_frame + 4 * dst_stride * (src_height - 1);
uint8* out2 = out - dst_stride * 4;
int32 tmpR, tmpG, tmpB;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
u = src_uplane;
v = src_vplane;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time, 2 y's at a time
for (w = 0; w < (src_width >> 1); w++)
{
// Vertical and horizontal sub-sampling
tmpR = (int32)((298 * (y1[0] - 16) + 409 * (v[0] - 128)
+ 128) >> 8);
tmpG = (int32)((298 * (y1[0] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128 ) >> 8);
tmpB = (int32)((298 * (y1[0] - 16) + 516 * (u[0] - 128)
+ 128 ) >> 8);
out[3] = 0xff;
out[0] = Clip(tmpR);
out[1] = Clip(tmpG);
out[2] = Clip(tmpB);
tmpR = (int32)((298 * (y2[0] - 16) + 409 * (v[0] - 128)
+ 128) >> 8);
tmpG = (int32)((298 * (y2[0] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128) >> 8);
tmpB = (int32)((298 * (y2[0] - 16) + 516 * (u[0] - 128)
+ 128) >> 8);
out2[3] = 0xff;
out2[0] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[2] = Clip(tmpB);
tmpR = (int32)((298 * (y1[1] - 16) + 409 * (v[0] - 128)
+ 128 ) >> 8);
tmpG = (int32)((298 * (y1[1] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128 ) >> 8);
tmpB = (int32)((298 * (y1[1] - 16) + 516 * (u[0] - 128)
+ 128) >> 8);
out[7] = 0xff;
out[4] = Clip(tmpR);
out[5] = Clip(tmpG);
out[6] = Clip(tmpB);
tmpR = (int32)((298 * (y2[1] - 16) + 409 * (v[0] - 128)
+ 128) >> 8);
tmpG = (int32)((298 * (y2[1] - 16) - 100 * (u[0] - 128)
- 208 * (v[0] - 128) + 128) >> 8);
tmpB = (int32)((298 * (y2[1] - 16) + 516 * (u[0] - 128)
+ 128 ) >> 8);
out2[7] = 0xff;
out2[4] = Clip(tmpR);
out2[5] = Clip(tmpG);
out2[6] = Clip(tmpB);
out += 8;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += src_ystride + src_ystride - src_width;
y2 += src_ystride + src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out -= (2 * dst_stride + src_width) * 4;
out2 -= (2 * dst_stride + src_width) * 4;
} // end height for
return 0;
}
int
ConvertUYVYToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || dst_ystride == 0 ||
dst_ustride == 0 || dst_vstride == 0 || src_stride == 0)
{
return -1;
}
int i, j;
uint8* outI = dst_yplane;
uint8* outU = dst_uplane;
uint8* outV = dst_vplane;
// U0Y0V0Y1..U2Y2V2Y3.....
for (i = 0; i < (src_height >> 1); i++)
{
for (j = 0; j < (src_width >> 1); j++)
{
outI[0] = src_frame[1];
*outU = src_frame[0];
outI[1] = src_frame[3];
*outV = src_frame[2];
src_frame += 4;
outI += 2;
outU++;
outV++;
}
for (j = 0; j < (src_width >> 1); j++)
{
outI[0] = src_frame[1];
outI[1] = src_frame[3];
src_frame += 4;
outI += 2;
}
outI += dst_ystride - src_width;
outU += dst_ustride - src_width << 1;
outV += dst_vstride - src_width << 1;
}
return 0;
}
int
ConvertRGB24ToARGB(const uint8* src_frame, int src_stride,
uint8* dst_frame, int dst_stride,
int src_width, int src_height
)
{
if (src_width < 1 || src_height < 1 || dst_stride < 1)
{
return -1;
}
int i, j, offset;
uint8* outFrame = dst_frame;
const uint8* inFrame = src_frame;
outFrame += dst_stride * (src_height - 1) * 4;
for (i = 0; i < src_height; i++)
{
for (j = 0; j < src_width; j++)
{
offset = j*4;
outFrame[0 + offset] = inFrame[0];
outFrame[1 + offset] = inFrame[1];
outFrame[2 + offset] = inFrame[2];
outFrame[3 + offset] = 0xff;
inFrame += 3;
}
outFrame -= 4 * (dst_stride - src_width);
inFrame += src_stride - src_width;
}
return 0;
}
int
ConvertRGB24ToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || dst_ystride == 0 ||
dst_ustride == 0 || dst_vstride == 0 || src_stride == 0)
{
return -1;
}
uint8* yStartPtr;
uint8* yStartPtr2;
uint8* uStartPtr;
uint8* vStartPtr;
const uint8* inpPtr;
const uint8* inpPtr2;
int h, w;
// Assuming RGB in a bottom up orientation.
yStartPtr = dst_yplane;
yStartPtr2 = yStartPtr + dst_ystride;
uStartPtr = dst_uplane;
vStartPtr =dst_vplane;
inpPtr = src_frame + src_stride * src_height * 3 - 3 * src_height;
inpPtr2 = inpPtr - 3 * src_stride;
for (h = 0; h < (src_height >> 1); h++ )
{
for (w = 0; w < (src_width >> 1); w++)
{
// Y
yStartPtr[0] = (uint8)((66 * inpPtr[2] + 129 * inpPtr[1]
+ 25 * inpPtr[0] + 128) >> 8) + 16;
yStartPtr2[0] = (uint8)((66 * inpPtr2[2] + 129 * inpPtr2[1]
+ 25 * inpPtr2[0] + 128) >> 8) + 16;
// Moving to next column
yStartPtr[1] = (uint8)((66 * inpPtr[5] + 129 * inpPtr[4]
+ 25 * inpPtr[3] + 128) >> 8) + 16;
yStartPtr2[1] = (uint8)((66 * inpPtr2[5] + 129 * inpPtr2[4]
+ 25 * inpPtr2[3] + 128) >> 8 ) + 16;
// U
uStartPtr[0] = (uint8)((-38 * inpPtr[2] - 74 * inpPtr[1] +
112 * inpPtr[0] + 128) >> 8) + 128;
// V
vStartPtr[0] = (uint8)((112 * inpPtr[2] -94 * inpPtr[1] -
18 * inpPtr[0] + 128) >> 8) + 128;
yStartPtr += 2;
yStartPtr2 += 2;
uStartPtr++;
vStartPtr++;
inpPtr += 6;
inpPtr2 += 6;
} // end for w
yStartPtr += dst_ystride + dst_ystride - src_width;
yStartPtr2 += dst_ystride + dst_ystride - src_width;
uStartPtr += dst_ustride + dst_ustride - (src_width >> 1);
vStartPtr += dst_vstride + dst_vstride - (src_width >> 1);
inpPtr -= 3 * (2 * src_stride + src_width);
inpPtr2 -= 3 * (2 * src_stride + src_width);
} // end for h
return 0;
}
int
ConvertYV12ToARGB(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || src_ystride == 0 ||
src_ustride == 0 || src_vstride == 0 || dst_stride == 0)
{
return -1;
}
int32 diff = dst_stride - src_width;
uint8* out = dst_frame;
uint8* out2 = out + dst_stride * 4;
const uint8 *y1,*y2, *u, *v;
int h, w;
y1 = src_yplane;
y2 = y1 + src_ystride;
v = src_vplane;
u = src_uplane;
int32 tmpG, tmpB, tmpR;
for (h = (src_height >> 1); h > 0; h--)
{
// 2 rows at a time
for (w = 0; w < (src_width >> 1); w++)
{
tmpR = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128 )>> 8);
tmpG = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128 )>> 8);
out[2] = Clip(tmpR);
out[1] = Clip(tmpG);
out[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
out2[2] = Clip(tmpR);
out2[1] = Clip(tmpG);
out2[0] = Clip(tmpB);
tmpR = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128)>> 8);
tmpG = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
out[6] = Clip(tmpR);
out[5] = Clip(tmpG);
out[4] = Clip(tmpB);
tmpR = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
tmpG = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]]
+ 128) >> 8);
tmpB = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
out2[6] = Clip(tmpR);
out2[5] = Clip(tmpG);
out2[4] = Clip(tmpB);
out[3] = 0xff;
out[7] = 0xff;
out += 8;
out2[3] = 0xff;
out2[7] = 0xff;
out2 += 8;
y1 += 2;
y2 += 2;
u++;
v++;
}
y1 += 2 * src_ystride - src_width;
y2 += 2 * src_ystride - src_width;
u += src_ustride - (src_width >> 1);
v += src_vstride - (src_width >> 1);
out += 4 * (2 * dst_stride - src_width);
out2 += 4 * (2 * dst_stride - src_width);
}
return 0;
}
int
ConvertABGRToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width,
int src_height
)
{
if (src_width == 0 || src_height == 0 || dst_ystride == 0 ||
dst_ustride == 0 || dst_vstride == 0 || src_stride == 0)
{
return -1;
}
uint8* yStartPtr;
uint8* yStartPtr2;
uint8* uStartPtr;
uint8* vStartPtr;
const uint8* inpPtr;
const uint8* inpPtr2;
yStartPtr = dst_yplane;
yStartPtr2 = yStartPtr + dst_ystride;
uStartPtr = dst_uplane;
vStartPtr = dst_vplane;
inpPtr = src_frame;
inpPtr2 = inpPtr + 4 * src_stride;
int h, w;
for (h = 0; h < (src_height >> 1); h++)
{
for (w = 0; w < (src_height >> 1); w++)
{
// Y
yStartPtr[0] = (uint8)((66 * inpPtr[1] + 129 * inpPtr[2]
+ 25 * inpPtr[3] + 128) >> 8) + 16;
yStartPtr2[0] = (uint8)((66 * inpPtr2[1] + 129 * inpPtr2[2]
+ 25 * inpPtr2[3] + 128) >> 8) + 16;
// Moving to next column
yStartPtr[1] = (uint8)((66 * inpPtr[5] + 129 * inpPtr[6]
+ 25 * inpPtr[7] + 128) >> 8) + 16;
yStartPtr2[1] = (uint8)((66 * inpPtr2[5] + 129 * inpPtr2[6]
+ 25 * inpPtr2[7] + 128) >> 8) + 16;
// U
uStartPtr[0] = (uint8)((-38 * inpPtr[1] - 74 * inpPtr[2]
+ 112 * inpPtr[3] + 128) >> 8) + 128;
// V
vStartPtr[0] = (uint8)((112 * inpPtr[1] - 94 * inpPtr[2]
- 18 * inpPtr[3] + 128) >> 8) + 128;
yStartPtr += 2;
yStartPtr2 += 2;
uStartPtr++;
vStartPtr++;
inpPtr += 8;
inpPtr2 += 8;
}
yStartPtr += 2 * dst_ystride - src_width;
yStartPtr2 += 2 * dst_ystride - src_width;
uStartPtr += dst_ustride + dst_ustride - (src_width >> 1);
vStartPtr += dst_vstride + dst_vstride - (src_width >> 1);
inpPtr += 4 * (2 * src_stride - src_width);
inpPtr2 += 4 * (2 * src_stride - src_width);
}
return 0;
}
inline
uint8 Clip(int32 val)
{
if (val < 0)
{
return (uint8)0;
} else if (val > 255)
{
return (uint8)255;
}
return (uint8)val;
}
#ifdef SCALEOPT
//memcpy_16 assumes that width is an integer multiple of 16!
void
*memcpy_16(void * dest, const void * src, size_t n)
{
_asm
{
mov eax, dword ptr [src]
mov ebx, dword ptr [dest]
mov ecx, dword ptr [n]
loop0:
movdqu xmm0, XMMWORD PTR [eax]
movdqu XMMWORD PTR [ebx], xmm0
add eax, 16
add ebx, 16
sub ecx, 16
jg loop0
}
}
// memcpy_8 assumes that width is an integer multiple of 8!
void
*memcpy_8(void * dest, const void * src, size_t n)
{
_asm
{
mov eax, dword ptr [src]
mov ebx, dword ptr [dest]
mov ecx, dword ptr [n]
loop0:
movq mm0, QWORD PTR [eax]
movq QWORD PTR [ebx], mm0
add eax, 8
add ebx, 8
sub ecx, 8
jg loop0
emms
}
}
#endif
} // namespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "cpu_id.h"
#ifdef _MSC_VER
#include <intrin.h>
#elif LINUX
#include "linux.h"
#endif
// TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux.
#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile (
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
}
#elif defined(__i386__) || defined(__x86_64__)
static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile (
"cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type)
);
}
#endif
namespace libyuv {
// CPU detect function for SIMD instruction sets.
bool CpuInfo::cpu_info_initialized_ = false;
int CpuInfo::cpu_info_ = 0;
// Global lock for cpu initialization.
#ifdef CPU_X86
void cpuid(int cpu_info[4], int info_type) {
__cpuid(cpu_info, info_type);
}
#endif
void CpuInfo::InitCpuFlags() {
#ifdef CPU_X86
int cpu_info[4];
__cpuid(cpu_info, 1);
cpu_info_ = (cpu_info[2] & 0x00000200 ? kCpuHasSSSE3 : 0) |
(cpu_info[3] & 0x04000000 ? kCpuHasSSE2 : 0);
#elif defined(__ARM_NEON__)
// gcc -mfpu=neon defines __ARM_NEON__
// if code is specifically built for Neon-only, enable the flag.
cpu_info_ |= kCpuHasNEON;
#elif LINUX && defined(__arm__)
cpu_info_ = 0;
// Look for NEON support in /proc/cpuinfo
ProcCpuInfo proc_info;
size_t section_count;
if (proc_info.LoadFromSystem() &&
proc_info.GetSectionCount(&section_count)) {
for (size_t i = 0; i < section_count; ++i) {
std::string out_features;
if (proc_info.GetSectionStringValue(i, "Features", &out_features)) {
if (out_features.find("neon") != std::string::npos) {
cpu_info_ |= kCpuHasNEON;
}
break;
}
}
}
#else
cpu_info_ = 0;
#endif
cpu_info_initialized_ = true;
}
void CpuInfo::MaskCpuFlagsForTest(int enable_flags) {
InitCpuFlags();
cpu_info_ &= enable_flags;
}
bool CpuInfo::TestCpuFlag(int flag) {
if (!cpu_info_initialized_) {
InitCpuFlags();
}
return cpu_info_ & flag ? true : false;
}
// Returns the vendor string from the cpu, e.g. "GenuineIntel", "AuthenticAMD".
// See "Intel Processor Identification and the CPUID Instruction"
// (Intel document number: 241618)
std::string CpuInfo::GetCpuVendor() {
#ifdef CPU_X86
int cpu_info[4];
cpuid(cpu_info, 0);
cpu_info[0] = cpu_info[1]; // Reorder output
cpu_info[1] = cpu_info[3];
cpu_info[2] = cpu_info[2];
cpu_info[3] = 0;
return std::string(reinterpret_cast<char *>(&cpu_info[0]));
#else
return std::string("Undefined");
#endif
}
} // namespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_SOURCE_CPU_ID_H_
#define LIBYUV_SOURCE_CPU_ID_H_
#include <string>
#include "basic_types.h"
namespace libyuv {
#ifdef CPU_X86
void cpuid(int cpu_info[4], int info_type);
#endif
class CpuInfo {
public:
// These flags are only valid on x86 processors
static const int kCpuHasSSE2 = 1;
static const int kCpuHasSSSE3 = 2;
// SIMD support on ARM processors
static const int kCpuHasNEON = 4;
// Detect CPU has SSE2 etc.
static bool TestCpuFlag(int flag);
// Detect CPU vendor: "GenuineIntel" or "AuthenticAMD"
static std::string GetCpuVendor();
// For testing, allow CPU flags to be disabled.
static void MaskCpuFlagsForTest(int enable_flags);
private:
// Global lock for the cpu initialization
static bool cpu_info_initialized_;
static int cpu_info_;
static void InitCpuFlags();
DISALLOW_IMPLICIT_CONSTRUCTORS(CpuInfo);
};
} // namespace libyuv
#endif // LIBYUV_SOURCE_CPU_ID_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "format_conversion.h"
#include "common.h"
#include "video_common.h"
namespace libyuv {
enum {
RED = 0,
BLUE = 1,
GREEN_BETWEEN_RED = 2,
GREEN_BETWEEN_BLUE = 3,
};
enum Position {
LEFT = 0,
RIGHT = 1,
TOP = 2,
BOTTOM = 4,
CENTER = 6,
// Due to the choice of the above values, these are all distinct and the
// corner values and edge values are each contiguous. This allows us to
// figure out the position type of a pixel with a single addition operation
// using the above values, rather than having to use a 3x3 nested switch
// statement.
TOP_LEFT = TOP + LEFT, // 2
TOP_RIGHT = TOP + RIGHT, // 3
BOTTOM_LEFT = BOTTOM + LEFT, // 4
BOTTOM_RIGHT = BOTTOM + RIGHT, // 5
LEFT_EDGE = CENTER + LEFT, // 6
RIGHT_EDGE = CENTER + RIGHT, // 7
TOP_EDGE = TOP + CENTER, // 8
BOTTOM_EDGE = BOTTOM + CENTER, // 10
MIDDLE = CENTER + CENTER, // 12
};
static FORCE_INLINE Position GetPosition(int x, int y, int width, int height) {
Position xpos = CENTER;
Position ypos = CENTER;
if (x == 0) {
xpos = LEFT;
} else if (x == width - 1) {
xpos = RIGHT;
}
if (y == 0) {
ypos = TOP;
} else if (y == height - 1) {
ypos = BOTTOM;
}
return static_cast<Position>(xpos + ypos);
}
static FORCE_INLINE bool IsRedBlue(uint8 colour) {
return colour <= BLUE;
}
static FORCE_INLINE uint32 FourCcToBayerPixelColourMap(uint32 fourcc) {
// The colour map is a 4-byte array-as-uint32 containing the colours for the
// four pixels in each 2x2 grid, in left-to-right and top-to-bottom order.
switch (fourcc) {
default:
ASSERT(false);
case FOURCC_RGGB:
return FOURCC(RED, GREEN_BETWEEN_RED, GREEN_BETWEEN_BLUE, BLUE);
case FOURCC_BGGR:
return FOURCC(BLUE, GREEN_BETWEEN_BLUE, GREEN_BETWEEN_RED, RED);
case FOURCC_GRBG:
return FOURCC(GREEN_BETWEEN_RED, RED, BLUE, GREEN_BETWEEN_BLUE);
case FOURCC_GBRG:
return FOURCC(GREEN_BETWEEN_BLUE, BLUE, RED, GREEN_BETWEEN_RED);
}
}
static FORCE_INLINE void RGBToYUV(uint8 r, uint8 g, uint8 b,
uint8* y, uint8* u, uint8* v) {
// Taken from http://en.wikipedia.org/wiki/YUV
*y = (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
*u = ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
*v = ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
}
static FORCE_INLINE void InterpolateBayerRGBCorner(uint8* r,
uint8* g,
uint8* b,
const uint8* src,
int src_pitch,
Position pos,
uint8 colour) {
// Compute the offsets to use for fetching the adjacent pixels.
int adjacent_row;
int adjacent_column;
switch (pos) {
case TOP_LEFT:
adjacent_row = src_pitch;
adjacent_column = 1;
break;
case TOP_RIGHT:
adjacent_row = src_pitch;
adjacent_column = -1;
break;
case BOTTOM_LEFT:
adjacent_row = -src_pitch;
adjacent_column = 1;
break;
case BOTTOM_RIGHT:
default:
adjacent_row = -src_pitch;
adjacent_column = -1;
break;
}
// Now interpolate.
if (IsRedBlue(colour)) {
uint8 current_pixel = src[0];
// Average of the adjacent green pixels (there's only two).
*g = (src[adjacent_column] + src[adjacent_row]) / 2;
// Average of the oppositely-coloured corner pixels (there's only one).
uint8 corner_average = src[adjacent_row + adjacent_column];
if (colour == RED) {
*r = current_pixel;
*b = corner_average;
} else { // i.e., BLUE
*b = current_pixel;
*r = corner_average;
}
} else { // i.e., GREEN_BETWEEN_*
*g = src[0];
// Average of the adjacent same-row pixels (there's only one).
uint8 row_average = src[adjacent_column];
// Average of the adjacent same-column pixels (there's only one).
uint8 column_average = src[adjacent_row];
if (colour == GREEN_BETWEEN_RED) {
*r = row_average;
*b = column_average;
} else { // i.e., GREEN_BETWEEN_BLUE
*b = row_average;
*r = column_average;
}
}
}
static FORCE_INLINE void InterpolateBayerRGBEdge(uint8* r,
uint8* g,
uint8* b,
const uint8* src,
int src_pitch,
Position pos,
uint8 colour) {
// Compute the offsets to use for fetching the adjacent pixels.
// Goes one pixel "in" to the image (i.e. towards the center)
int inner;
// Goes one pixel to the side (i.e. along the edge) in either the clockwise or
// counter-clockwise direction, and its negative value goes in the other
// direction.
int side;
switch (pos) {
case TOP_EDGE:
inner = src_pitch;
side = 1;
break;
case RIGHT_EDGE:
inner = -1;
side = src_pitch;
break;
case BOTTOM_EDGE:
inner = -src_pitch;
side = 1;
break;
case LEFT_EDGE:
default:
inner = 1;
side = src_pitch;
break;
}
// Now interpolate.
if (IsRedBlue(colour)) {
uint8 current_pixel = src[0];
// Average of the adjacent green pixels (there's only three).
*g = (src[inner] + src[side] + src[-side]) / 3;
// Average of the oppositely-coloured corner pixels (there's only two).
uint8 corner_average = (src[inner + side] + src[inner - side]) / 2;
if (colour == RED) {
*r = current_pixel;
*b = corner_average;
} else { // i.e., BLUE
*b = current_pixel;
*r = corner_average;
}
} else { // i.e., GREEN_BETWEEN_*
*g = src[0];
// Average of the adjacent side-ways pixels (there's only two).
uint8 side_average = (src[side] + src[-side]) / 2;
// Average of the adjacent inner-ways pixels (there's only one).
uint8 inner_pixel = src[inner];
// Including && side == 1 effectively transposes the colour logic for
// processing the left/right sides, which is needed since the "T" shape
// formed by the pixels is transposed.
if (colour == GREEN_BETWEEN_RED && side == 1) {
*r = side_average;
*b = inner_pixel;
} else { // i.e., GREEN_BETWEEN_BLUE || side != 1
*b = side_average;
*r = inner_pixel;
}
}
}
// We inline this one because it runs 99% of the time, so inlining it is
// probably beneficial.
static FORCE_INLINE void InterpolateBayerRGBCenter(uint8* r,
uint8* g,
uint8* b,
const uint8* src,
int src_pitch,
uint8 colour) {
if (IsRedBlue(colour)) {
uint8 current_pixel = src[0];
// Average of the adjacent green pixels (there's four).
// NOTE(tschmelcher): The material at
// http://www.siliconimaging.com/RGB%20Bayer.htm discusses a way to improve
// quality here by using only two of the green pixels based on the
// correlation to the nearby red/blue pixels, but that is slower and would
// result in more edge cases.
*g = (src[1] + src[-1] + src[src_pitch] + src[-src_pitch]) / 4;
// Average of the oppositely-coloured corner pixels (there's four).
uint8 corner_average = (src[src_pitch + 1] +
src[src_pitch - 1] +
src[-src_pitch + 1] +
src[-src_pitch - 1]) / 4;
if (colour == RED) {
*r = current_pixel;
*b = corner_average;
} else { // i.e., BLUE
*b = current_pixel;
*r = corner_average;
}
} else { // i.e., GREEN_BETWEEN_*
*g = src[0];
// Average of the adjacent same-row pixels (there's two).
uint8 row_adjacent = (src[1] + src[-1]) / 2;
// Average of the adjacent same-column pixels (there's two).
uint8 column_adjacent = (src[src_pitch] + src[-src_pitch]) / 2;
if (colour == GREEN_BETWEEN_RED) {
*r = row_adjacent;
*b = column_adjacent;
} else { // i.e., GREEN_BETWEEN_BLUE
*b = row_adjacent;
*r = column_adjacent;
}
}
}
// Converts any Bayer RGB format to I420.
void BayerRGBToI420(const uint8* src, int src_pitch, uint32 src_fourcc,
uint8* y, int y_pitch,
uint8* u, int u_pitch,
uint8* v, int v_pitch,
int width, int height) {
ASSERT(width % 2 == 0);
ASSERT(height % 2 == 0);
uint32 colour_map = FourCcToBayerPixelColourMap(src_fourcc);
int src_row_inc = src_pitch * 2 - width;
int y_row_inc = y_pitch * 2 - width;
int u_row_inc = u_pitch - width / 2;
int v_row_inc = v_pitch - width / 2;
// Iterate over the 2x2 grids.
for (int y1 = 0; y1 < height; y1 += 2) {
for (int x1 = 0; x1 < width; x1 += 2) {
uint32 colours = colour_map;
int total_u = 0;
int total_v = 0;
// Iterate over the four pixels within them.
for (int y2 = 0; y2 < 2; ++y2) {
for (int x2 = 0; x2 < 2; ++x2) {
uint8 r, g, b;
// The low-order byte of the colour map is the current colour.
uint8 current_colour = static_cast<uint8>(colours);
colours >>= 8;
Position pos = GetPosition(x1 + x2, y1 + y2, width, height);
const uint8* src_pixel = &src[y2 * src_pitch + x2];
uint8* y_pixel = &y[y2 * y_pitch + x2];
// Convert from Bayer RGB to regular RGB.
if (pos == MIDDLE) {
// 99% of the image is the middle.
InterpolateBayerRGBCenter(&r, &g, &b,
src_pixel, src_pitch,
current_colour);
} else if (pos >= LEFT_EDGE) {
// Next most frequent is edges.
InterpolateBayerRGBEdge(&r, &g, &b,
src_pixel, src_pitch, pos,
current_colour);
} else {
// Last is the corners. There are only 4.
InterpolateBayerRGBCorner(&r, &g, &b,
src_pixel, src_pitch, pos,
current_colour);
}
// Convert from RGB to YUV.
uint8 tmp_u, tmp_v;
RGBToYUV(r, g, b, y_pixel, &tmp_u, &tmp_v);
total_u += tmp_u;
total_v += tmp_v;
}
}
src += 2;
y += 2;
*u = total_u / 4;
*v = total_v / 4;
++u;
++v;
}
src += src_row_inc;
y += y_row_inc;
u += u_row_inc;
v += v_row_inc;
}
}
// Note: to do this with Neon vld4.8 would load ARGB values into 4 registers
// and vst would select which 2 components to write. The low level would need
// to be ARGBToBG, ARGBToGB, ARGBToRG, ARGBToGR
#if defined(WIN32) && !defined(COVERAGE_ENABLED)
#define HAS_ARGBTOBAYERROW_SSSE3
__declspec(naked)
static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_bayer
movd xmm0, [esp + 12] // selector
mov ecx, [esp + 16] // pix
pshufd xmm0, xmm0, 0
wloop:
movdqa xmm1, [eax]
lea eax, [eax + 16]
pshufb xmm1, xmm0
movd [edx], xmm1
lea edx, [edx + 4]
sub ecx, 4
ja wloop
ret
}
}
#elif defined(__i386__) && !defined(COVERAGE_ENABLED) && \
!TARGET_IPHONE_SIMULATOR
#define HAS_ARGBTOBAYERROW_SSSE3
extern "C" void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix);
asm(
".text\n"
#if defined(OSX)
".globl _ARGBToBayerRow_SSSE3\n"
"_ARGBToBayerRow_SSSE3:\n"
#else
".global ARGBToBayerRow_SSSE3\n"
"ARGBToBayerRow_SSSE3:\n"
#endif
"mov 0x4(%esp),%eax\n"
"mov 0x8(%esp),%edx\n"
"movd 0xc(%esp),%xmm0\n"
"mov 0x10(%esp),%ecx\n"
"pshufd $0x0,%xmm0,%xmm0\n"
"1:"
"movdqa (%eax),%xmm1\n"
"lea 0x10(%eax),%eax\n"
"pshufb %xmm0,%xmm1\n"
"movd %xmm1,(%edx)\n"
"lea 0x4(%edx),%edx\n"
"sub $0x4,%ecx\n"
"ja 1b\n"
"ret\n"
);
#endif
static void ARGBToBayerRow_C(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix) {
int index0 = selector & 0xff;
int index1 = (selector >> 8) & 0xff;
// Copy a row of Bayer.
for (int x = 0; x < pix; x += 2) {
dst_bayer[0] = src_argb[index0];
dst_bayer[1] = src_argb[index1];
src_argb += 8;
dst_bayer += 2;
}
}
// generate a selector mask useful for pshufb
static uint32 GenerateSelector(int select0, int select1) {
return static_cast<uint32>(select0) |
static_cast<uint32>((select1 + 4) << 8) |
static_cast<uint32>((select0 + 8) << 16) |
static_cast<uint32>((select1 + 12) << 24);
}
// Converts any 32 bit ARGB to any Bayer RGB format.
void RGB32ToBayerRGB(const uint8* src_rgb, int src_pitch_rgb,
uint32 src_fourcc_rgb,
uint8* dst_bayer, int dst_pitch_bayer,
uint32 dst_fourcc_bayer,
int width, int height) {
ASSERT(width % 2 == 0);
void (*ARGBToBayerRow)(const uint8* src_argb,
uint8* dst_bayer, uint32 selector, int pix);
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (CpuInfo::TestCpuFlag(CpuInfo::kCpuHasSSSE3) &&
(width % 4 == 0) &&
IS_ALIGNED(src_rgb, 16) && (src_pitch_rgb % 16 == 0) &&
IS_ALIGNED(dst_bayer, 4) && (dst_pitch_bayer % 4 == 0)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
} else
#endif
{
ARGBToBayerRow = ARGBToBayerRow_C;
}
ASSERT(src_fourcc_rgb == FOURCC_ARGB);
int blue_index = 0;
int green_index = 1;
int red_index = 2;
// Now build a lookup table containing the indices for the four pixels in each
// 2x2 Bayer grid.
uint32 index_map[2];
switch (dst_fourcc_bayer) {
default:
ASSERT(false);
case FOURCC_RGGB:
index_map[0] = GenerateSelector(red_index, green_index);
index_map[1] = GenerateSelector(green_index, blue_index);
break;
case FOURCC_BGGR:
index_map[0] = GenerateSelector(blue_index, green_index);
index_map[1] = GenerateSelector(green_index, red_index);
break;
case FOURCC_GRBG:
index_map[0] = GenerateSelector(green_index, red_index);
index_map[1] = GenerateSelector(blue_index, green_index);
break;
case FOURCC_GBRG:
index_map[0] = GenerateSelector(green_index, blue_index);
index_map[1] = GenerateSelector(red_index, green_index);
break;
}
// Now convert.
for (int y = 0; y < height; ++y) {
ARGBToBayerRow(src_rgb, dst_bayer, index_map[y & 1], width);
src_rgb += src_pitch_rgb;
dst_bayer += dst_pitch_bayer;
}
}
} // namespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "general.h"
#include <assert.h>
#include <string.h> // memcpy(), memset()
#include "video_common.h"
namespace libyuv {
int
MirrorI420LeftRight( const uint8* src_frame,uint8* dst_frame,
int src_width, int src_height)
{
if (src_width < 1 || src_height < 1)
{
return -1;
}
assert(src_width % 2 == 0 && src_height % 2 == 0);
int indO = 0;
int indS = 0;
int wind, hind;
uint8 tmpVal;
// Will swap two values per iteration
const int halfW = src_width >> 1;
const int halfStride = src_width >> 1;
// Y
for (wind = 0; wind < halfW; wind++ )
{
for (hind = 0; hind < src_height; hind++ )
{
indO = hind * src_width + wind;
indS = hind * src_width + (src_width - wind - 1); // swapping index
tmpVal = src_frame[indO];
dst_frame[indO] = src_frame[indS];
dst_frame[indS] = tmpVal;
} // end for (height)
} // end for(width)
const int lengthW = src_width >> 2;
const int lengthH = src_height >> 1;
// V
int zeroInd = src_width * src_height;
for (wind = 0; wind < lengthW; wind++ )
{
for (hind = 0; hind < lengthH; hind++ )
{
indO = zeroInd + hind * halfW + wind;
indS = zeroInd + hind * halfW + (halfW - wind - 1);// swapping index
tmpVal = src_frame[indO];
dst_frame[indO] = src_frame[indS];
dst_frame[indS] = tmpVal;
} // end for (height)
} // end for(width)
// U
zeroInd += src_width * src_height >> 2;
for (wind = 0; wind < lengthW; wind++ )
{
for (hind = 0; hind < lengthH; hind++ )
{
indO = zeroInd + hind * halfW + wind;
indS = zeroInd + hind * halfW + (halfW - wind - 1);// swapping index
tmpVal = src_frame[indO];
dst_frame[indO] = src_frame[indS];
dst_frame[indS] = tmpVal;
} // end for (height)
} // end for(width)
return 0;
}
// Make a center cut
int
CutI420Frame(uint8* frame,
int fromWidth, int fromHeight,
int toWidth, int toHeight)
{
if (toWidth < 1 || fromWidth < 1 || toHeight < 1 || fromHeight < 1 )
{
return -1;
}
if (toWidth == fromWidth && toHeight == fromHeight)
{
// Nothing to do
return 3 * toHeight * toWidth / 2;
}
if (toWidth > fromWidth || toHeight > fromHeight)
{
// error
return -1;
}
int i = 0;
int m = 0;
int loop = 0;
int halfToWidth = toWidth / 2;
int halfToHeight = toHeight / 2;
int halfFromWidth = fromWidth / 2;
int halfFromHeight= fromHeight / 2;
int cutHeight = ( fromHeight - toHeight ) / 2;
int cutWidth = ( fromWidth - toWidth ) / 2;
for (i = fromWidth * cutHeight + cutWidth; loop < toHeight ;
loop++, i += fromWidth)
{
memcpy(&frame[m],&frame[i],toWidth);
m += toWidth;
}
i = fromWidth * fromHeight; // ilum
loop = 0;
for ( i += (halfFromWidth * cutHeight / 2 + cutWidth / 2);
loop < halfToHeight; loop++,i += halfFromWidth)
{
memcpy(&frame[m],&frame[i],halfToWidth);
m += halfToWidth;
}
loop = 0;
i = fromWidth * fromHeight + halfFromHeight * halfFromWidth; // ilum + Cr
for ( i += (halfFromWidth * cutHeight / 2 + cutWidth / 2);
loop < halfToHeight; loop++, i += halfFromWidth)
{
memcpy(&frame[m],&frame[i],halfToWidth);
m += halfToWidth;
}
return halfToWidth * toHeight * 3;
}
int
CutPadI420Frame(const uint8* inFrame, int inWidth,
int inHeight, uint8* outFrame,
int outWidth, int outHeight)
{
if (inWidth < 1 || outWidth < 1 || inHeight < 1 || outHeight < 1 )
{
return -1;
}
if (inWidth == outWidth && inHeight == outHeight)
{
memcpy(outFrame, inFrame, 3 * outWidth * (outHeight >> 1));
}
else
{
if ( inHeight < outHeight)
{
// pad height
int padH = outHeight - inHeight;
int i = 0;
int padW = 0;
int cutW = 0;
int width = inWidth;
if (inWidth < outWidth)
{
// pad width
padW = outWidth - inWidth;
}
else
{
// cut width
cutW = inWidth - outWidth;
width = outWidth;
}
if (padH)
{
memset(outFrame, 0, outWidth * (padH >> 1));
outFrame += outWidth * (padH >> 1);
}
for (i = 0; i < inHeight;i++)
{
if (padW)
{
memset(outFrame, 0, padW / 2);
outFrame += padW / 2;
}
inFrame += cutW >> 1; // in case we have a cut
memcpy(outFrame,inFrame ,width);
inFrame += cutW >> 1;
outFrame += width;
inFrame += width;
if (padW)
{
memset(outFrame, 0, padW / 2);
outFrame += padW / 2;
}
}
if (padH)
{
memset(outFrame, 0, outWidth * (padH >> 1));
outFrame += outWidth * (padH >> 1);
}
if (padH)
{
memset(outFrame, 127, (outWidth >> 2) * (padH >> 1));
outFrame += (outWidth >> 2) * (padH >> 1);
}
for (i = 0; i < (inHeight >> 1); i++)
{
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
inFrame += cutW >> 2; // in case we have a cut
memcpy(outFrame, inFrame,width >> 1);
inFrame += cutW >> 2;
outFrame += width >> 1;
inFrame += width >> 1;
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
}
if (padH)
{
memset(outFrame, 127, (outWidth >> 1) * (padH >> 1));
outFrame += (outWidth >> 1) * (padH >> 1);
}
for (i = 0; i < (inHeight >> 1); i++)
{
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
inFrame += cutW >> 2; // in case we have a cut
memcpy(outFrame, inFrame,width >> 1);
inFrame += cutW >> 2;
outFrame += width >> 1;
inFrame += width >> 1;
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
}
if (padH)
{
memset(outFrame, 127, (outWidth >> 2) * (padH >> 1));
outFrame += (outWidth >> 2) * (padH >> 1);
}
}
else
{
// cut height
int i = 0;
int padW = 0;
int cutW = 0;
int width = inWidth;
if (inWidth < outWidth)
{
// pad width
padW = outWidth - inWidth;
} else
{
// cut width
cutW = inWidth - outWidth;
width = outWidth;
}
int diffH = inHeight - outHeight;
inFrame += inWidth * (diffH >> 1); // skip top I
for (i = 0; i < outHeight; i++)
{
if (padW)
{
memset(outFrame, 0, padW / 2);
outFrame += padW / 2;
}
inFrame += cutW >> 1; // in case we have a cut
memcpy(outFrame,inFrame ,width);
inFrame += cutW >> 1;
outFrame += width;
inFrame += width;
if (padW)
{
memset(outFrame, 0, padW / 2);
outFrame += padW / 2;
}
}
inFrame += inWidth * (diffH >> 1); // skip end I
inFrame += (inWidth >> 2) * (diffH >> 1); // skip top of Cr
for (i = 0; i < (outHeight >> 1); i++)
{
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
inFrame += cutW >> 2; // in case we have a cut
memcpy(outFrame, inFrame,width >> 1);
inFrame += cutW >> 2;
outFrame += width >> 1;
inFrame += width >> 1;
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
}
inFrame += (inWidth >> 2) * (diffH >> 1); // skip end of Cr
inFrame += (inWidth >> 2) * (diffH >> 1); // skip top of Cb
for (i = 0; i < (outHeight >> 1); i++)
{
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
inFrame += cutW >> 2; // in case we have a cut
memcpy(outFrame, inFrame, width >> 1);
inFrame += cutW >> 2;
outFrame += width >> 1;
inFrame += width >> 1;
if (padW)
{
memset(outFrame, 127, padW >> 2);
outFrame += padW >> 2;
}
}
}
}
return 3 * outWidth * (outHeight >> 1);
}
} // nmaespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#if defined(LINUX) || defined(ANDROID)
#include "linux.h"
#include <ctype.h>
#include <errno.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <cstdio>
#include <set>
namespace libyuv {
static const char kCpuInfoFile[] = "/proc/cpuinfo";
static const char kCpuMaxFreqFile[] =
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq";
ProcCpuInfo::ProcCpuInfo() {
}
ProcCpuInfo::~ProcCpuInfo() {
}
bool ProcCpuInfo::LoadFromSystem() {
ConfigParser procfs;
if (!procfs.Open(kCpuInfoFile)) {
return false;
}
return procfs.Parse(&sections_);
};
bool ProcCpuInfo::GetSectionCount(size_t* count) {
if (sections_.empty()) {
return false;
}
if (count) {
*count = sections_.size();
}
return true;
}
bool ProcCpuInfo::GetNumCpus(int* num) {
if (sections_.empty()) {
return false;
}
int total_cpus = 0;
#if defined(__arm__)
// Count the number of blocks that have a "processor" key defined. On ARM,
// there may be extra blocks of information that aren't per-processor.
size_t section_count = sections_.size();
for (size_t i = 0; i < section_count; ++i) {
int processor_id;
if (GetSectionIntValue(i, "processor", &processor_id)) {
++total_cpus;
}
}
// Single core ARM systems don't include "processor" keys at all, so return
// that we have a single core if we didn't find any explicitly above.
if (total_cpus == 0) {
total_cpus = 1;
}
#else
// On X86, there is exactly one info section per processor.
total_cpus = static_cast<int>(sections_.size());
#endif
if (num) {
*num = total_cpus;
}
return true;
}
bool ProcCpuInfo::GetNumPhysicalCpus(int* num) {
if (sections_.empty()) {
return false;
}
// TODO(noahric): /proc/cpuinfo only reports cores that are currently
// _online_, so this may underreport the number of physical cores.
#if defined(__arm__)
// ARM (currently) has no hyperthreading, so just return the same value
// as GetNumCpus.
return GetNumCpus(num);
#else
int total_cores = 0;
std::set<int> physical_ids;
size_t section_count = sections_.size();
for (size_t i = 0; i < section_count; ++i) {
int physical_id;
int cores;
// Count the cores for the physical id only if we have not counted the id.
if (GetSectionIntValue(i, "physical id", &physical_id) &&
GetSectionIntValue(i, "cpu cores", &cores) &&
physical_ids.find(physical_id) == physical_ids.end()) {
physical_ids.insert(physical_id);
total_cores += cores;
}
}
if (num) {
*num = total_cores;
}
return true;
#endif
}
bool ProcCpuInfo::GetCpuFamily(int* id) {
int cpu_family = 0;
// shh {
#if defined(__arm__)
// On ChromeOS seaboard, there is no 'cpu family' in '/proc/cpuinfo'. But
// there is 'CPU Architecture' which can be used as 'cpu family'.
// See http://en.wikipedia.org/wiki/ARM_architecture for a good list of
// ARM cpu families, architectures, and their mappings.
// There may be multiple sessions that aren't per-processor. We need to scan
// through each session until we find the first 'CPU architecture'.
size_t section_count = sections_.size();
for (size_t i = 0; i < section_count; ++i) {
if (GetSectionIntValue(i, "CPU architecture", &cpu_family)) {
// We returns the first one (if there are multiple entries).
break;
};
}
#else
// shh }
GetSectionIntValue(0, "cpu family", &cpu_family);
// shh {
#endif
// shh }
if (id) {
*id = cpu_family;
}
return true;
}
bool ProcCpuInfo::GetSectionStringValue(size_t section_num,
const std::string& key,
std::string* result) {
if (section_num >= sections_.size()) {
return false;
}
ConfigParser::SimpleMap::iterator iter = sections_[section_num].find(key);
if (iter == sections_[section_num].end()) {
return false;
}
*result = iter->second;
return true;
}
bool ProcCpuInfo::GetSectionIntValue(size_t section_num,
const std::string& key,
int* result) {
if (section_num >= sections_.size()) {
return false;
}
ConfigParser::SimpleMap::iterator iter = sections_[section_num].find(key);
if (iter == sections_[section_num].end()) {
return false;
}
return FromString(iter->second, result);
}
ConfigParser::ConfigParser() {}
ConfigParser::~ConfigParser() {}
bool ConfigParser::Open(const std::string& filename) {
FileStream* fs = new FileStream();
if (!fs->Open(filename, "r", NULL)) {
return false;
}
instream_.reset(fs);
return true;
}
void ConfigParser::Attach(StreamInterface* stream) {
instream_.reset(stream);
}
bool ConfigParser::Parse(MapVector* key_val_pairs) {
// Parses the file and places the found key-value pairs into key_val_pairs.
SimpleMap section;
while (ParseSection(&section)) {
key_val_pairs->push_back(section);
section.clear();
}
return (!key_val_pairs->empty());
}
bool ConfigParser::ParseSection(SimpleMap* key_val_pair) {
// Parses the next section in the filestream and places the found key-value
// pairs into key_val_pair.
std::string key, value;
while (ParseLine(&key, &value)) {
(*key_val_pair)[key] = value;
}
return (!key_val_pair->empty());
}
bool ConfigParser::ParseLine(std::string* key, std::string* value) {
// Parses the next line in the filestream and places the found key-value
// pair into key and val.
std::string line;
if ((instream_->ReadLine(&line)) == EOF) {
return false;
}
std::vector<std::string> tokens;
if (2 != split(line, ':', &tokens)) {
return false;
}
// Removes whitespace at the end of Key name
size_t pos = tokens[0].length() - 1;
while ((pos > 0) && isspace(tokens[0][pos])) {
pos--;
}
tokens[0].erase(pos + 1);
// Removes whitespace at the start of value
pos = 0;
while (pos < tokens[1].length() && isspace(tokens[1][pos])) {
pos++;
}
tokens[1].erase(0, pos);
*key = tokens[0];
*value = tokens[1];
return true;
}
static bool ExpectLineFromStream(FileStream* stream,
std::string* out) {
StreamResult res = stream->ReadLine(out);
if (res != SR_SUCCESS) {
if (res != SR_EOS) {
LOG(LS_ERROR) << "Error when reading from stream";
} else {
LOG(LS_ERROR) << "Incorrect number of lines in stream";
}
return false;
}
return true;
}
static void ExpectEofFromStream(FileStream* stream) {
std::string unused;
StreamResult res = stream->ReadLine(&unused);
if (res == SR_SUCCESS) {
LOG(LS_WARNING) << "Ignoring unexpected extra lines from stream";
} else if (res != SR_EOS) {
LOG(LS_WARNING) << "Error when checking for extra lines from stream";
}
}
// For caching the lsb_release output (reading it invokes a sub-process and
// hence is somewhat expensive).
static std::string lsb_release_string;
static CriticalSection lsb_release_string_critsec;
std::string ReadLinuxLsbRelease() {
CritScope cs(&lsb_release_string_critsec);
if (!lsb_release_string.empty()) {
// Have cached result from previous call.
return lsb_release_string;
}
// No cached result. Run lsb_release and parse output.
POpenStream lsb_release_output;
if (!lsb_release_output.Open("lsb_release -idrcs", "r", NULL)) {
LOG_ERR(LS_ERROR) << "Can't run lsb_release";
return lsb_release_string; // empty
}
// Read in the command's output and build the string.
std::ostringstream sstr;
std::string line;
int wait_status;
if (!ExpectLineFromStream(&lsb_release_output, &line)) {
return lsb_release_string; // empty
}
sstr << "DISTRIB_ID=" << line;
if (!ExpectLineFromStream(&lsb_release_output, &line)) {
return lsb_release_string; // empty
}
sstr << " DISTRIB_DESCRIPTION=\"" << line << '"';
if (!ExpectLineFromStream(&lsb_release_output, &line)) {
return lsb_release_string; // empty
}
sstr << " DISTRIB_RELEASE=" << line;
if (!ExpectLineFromStream(&lsb_release_output, &line)) {
return lsb_release_string; // empty
}
sstr << " DISTRIB_CODENAME=" << line;
// Should not be anything left.
ExpectEofFromStream(&lsb_release_output);
lsb_release_output.Close();
wait_status = lsb_release_output.GetWaitStatus();
if (wait_status == -1 ||
!WIFEXITED(wait_status) ||
WEXITSTATUS(wait_status) != 0) {
LOG(LS_WARNING) << "Unexpected exit status from lsb_release";
}
lsb_release_string = sstr.str();
return lsb_release_string;
}
std::string ReadLinuxUname() {
struct utsname buf;
if (uname(&buf) < 0) {
LOG_ERR(LS_ERROR) << "Can't call uname()";
return std::string();
}
std::ostringstream sstr;
sstr << buf.sysname << " "
<< buf.release << " "
<< buf.version << " "
<< buf.machine;
return sstr.str();
}
int ReadCpuMaxFreq() {
FileStream fs;
std::string str;
int freq = -1;
if (!fs.Open(kCpuMaxFreqFile, "r", NULL) ||
SR_SUCCESS != fs.ReadLine(&str) ||
!FromString(str, &freq)) {
return -1;
}
return freq;
}
} // namespace libyuv
#endif // defined(LINUX) || defined(ANDROID)
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_SOURCE_LINUX_H_
#define LIBYUV_SOURCE_LINUX_H_
#if defined(LINUX) || defined(ANDROID)
#include <string>
#include <map>
#include <vector>
namespace libyuv {
//////////////////////////////////////////////////////////////////////////////
// ConfigParser parses a FileStream of an ".ini."-type format into a map.
//////////////////////////////////////////////////////////////////////////////
// Sample Usage:
// ConfigParser parser;
// ConfigParser::MapVector key_val_pairs;
// if (parser.Open(inifile) && parser.Parse(&key_val_pairs)) {
// for (int section_num=0; i < key_val_pairs.size(); ++section_num) {
// std::string val1 = key_val_pairs[section_num][key1];
// std::string val2 = key_val_pairs[section_num][key2];
// // Do something with valn;
// }
// }
class ConfigParser {
public:
typedef std::map<std::string, std::string> SimpleMap;
typedef std::vector<SimpleMap> MapVector;
ConfigParser();
virtual ~ConfigParser();
virtual bool Open(const std::string& filename);
virtual void Attach(StreamInterface* stream);
virtual bool Parse(MapVector* key_val_pairs);
virtual bool ParseSection(SimpleMap* key_val_pair);
virtual bool ParseLine(std::string* key, std::string* value);
private:
scoped_ptr<StreamInterface> instream_;
};
//////////////////////////////////////////////////////////////////////////////
// ProcCpuInfo reads CPU info from the /proc subsystem on any *NIX platform.
//////////////////////////////////////////////////////////////////////////////
// Sample Usage:
// ProcCpuInfo proc_info;
// int no_of_cpu;
// if (proc_info.LoadFromSystem()) {
// std::string out_str;
// proc_info.GetNumCpus(&no_of_cpu);
// proc_info.GetCpuStringValue(0, "vendor_id", &out_str);
// }
// }
class ProcCpuInfo {
public:
ProcCpuInfo();
virtual ~ProcCpuInfo();
// Reads the proc subsystem's cpu info into memory. If this fails, this
// returns false; if it succeeds, it returns true.
virtual bool LoadFromSystem();
// Obtains the number of logical CPU threads and places the value num.
virtual bool GetNumCpus(int* num);
// Obtains the number of physical CPU cores and places the value num.
virtual bool GetNumPhysicalCpus(int* num);
// Obtains the CPU family id.
virtual bool GetCpuFamily(int* id);
// Obtains the number of sections in /proc/cpuinfo, which may be greater
// than the number of CPUs (e.g. on ARM)
virtual bool GetSectionCount(size_t* count);
// Looks for the CPU proc item with the given name for the given section
// number and places the string value in result.
virtual bool GetSectionStringValue(size_t section_num, const std::string& key,
std::string* result);
// Looks for the CPU proc item with the given name for the given section
// number and places the int value in result.
virtual bool GetSectionIntValue(size_t section_num, const std::string& key,
int* result);
private:
ConfigParser::MapVector sections_;
};
// Builds a string containing the info from lsb_release on a single line.
std::string ReadLinuxLsbRelease();
// Returns the output of "uname".
std::string ReadLinuxUname();
// Returns the content (int) of
// /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
// Returns -1 on error.
int ReadCpuMaxFreq();
} // namespace libyuv
#endif // defined(LINUX) || defined(ANDROID)
#endif // LIBYUV_SOURCE_LINUX_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "planar_functions.h"
#include <string.h>
#include "cpu_id.h"
namespace libyuv {
#if defined(__ARM_NEON__) && !defined(COVERAGE_ENABLED)
#define HAS_SPLITUV_NEON
// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v
// Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels.
static void SplitUV_NEON(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
__asm__ volatile
(
"1:\n"
"vld2.u8 {q0,q1}, [%0]! \n" // load 16 pairs of UV
"vst1.u8 {q0}, [%1]! \n" // store U
"vst1.u8 {q1}, [%2]! \n" // Store V
"subs %3, %3, #16 \n" // 16 processed per loop
"bhi 1b \n"
: // Output registers
: "r"(src_uv), "r"(dst_u), "r"(dst_v), "r"(pix) // Input registers
: "q0", "q1" // Clobber List
);
}
#elif defined(WIN32) && !defined(COVERAGE_ENABLED)
#define HAS_SPLITUV_SSE2
static void SplitUV_SSE2(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
__asm {
mov esi, src_uv
mov edi, dst_u
mov edx, dst_v
mov ecx, pix
mov eax, 0x00ff00ff // mask for isolating low bytes
movd xmm7, eax
pshufd xmm7, xmm7, 0
wloop:
movdqa xmm0, [esi]
movdqa xmm1, [esi + 16]
lea esi, [esi + 32]
movdqa xmm2, xmm0
movdqa xmm3, xmm1
pand xmm0, xmm7 // even bytes
pand xmm1, xmm7
packuswb xmm0, xmm1
movdqa [edi], xmm0
lea edi, [edi + 16]
psrlw xmm2, 8 // odd bytes
psrlw xmm3, 8
packuswb xmm2, xmm3
movdqa [edx], xmm2
lea edx, [edx + 16]
sub ecx, 16
ja wloop
}
}
#elif defined(__i386__) && !defined(COVERAGE_ENABLED) && \
!TARGET_IPHONE_SIMULATOR
// GCC version is same as Visual C
#define HAS_SPLITUV_SSE2
extern "C" void SplitUV_SSE2(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix);
asm(
".text\n"
#if defined(OSX)
".globl _SplitUV_SSE2\n"
"_SplitUV_SSE2:\n"
#else
".global SplitUV_SSE2\n"
"SplitUV_SSE2:\n"
#endif
"push %ebp\n"
"mov %esp,%ebp\n"
"push %esi\n"
"push %edi\n"
"mov 0x8(%ebp),%esi\n"
"mov 0xc(%ebp),%edi\n"
"mov 0x10(%ebp),%edx\n"
"mov 0x14(%ebp),%ecx\n"
"mov $0xff00ff,%eax\n"
"movd %eax,%xmm7\n"
"pshufd $0x0,%xmm7,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa 0x10(%esi),%xmm1\n"
"lea 0x20(%esi),%esi\n"
"movdqa %xmm0,%xmm2\n"
"movdqa %xmm1,%xmm3\n"
"pand %xmm7,%xmm0\n"
"pand %xmm7,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"movdqa %xmm0,(%edi)\n"
"lea 0x10(%edi),%edi\n"
"psrlw $0x8,%xmm2\n"
"psrlw $0x8,%xmm3\n"
"packuswb %xmm3,%xmm2\n"
"movdqa %xmm2,(%edx)\n"
"lea 0x10(%edx),%edx\n"
"sub $0x10,%ecx\n"
"ja 1b\n"
"pop %edi\n"
"pop %esi\n"
"pop %ebp\n"
"ret\n"
);
#endif
static void SplitUV_C(const uint8* src_uv,
uint8* dst_u, uint8* dst_v, int pix) {
// Copy a row of UV.
for (int x = 0; x < pix; ++x) {
dst_u[0] = src_uv[0];
dst_v[0] = src_uv[1];
src_uv += 2;
dst_u += 1;
dst_v += 1;
}
}
static void I420CopyPlane(const uint8* src_y, int src_pitch_y,
uint8* dst_y, int dst_pitch_y,
int width, int height) {
// Copy plane
for (int y = 0; y < height; ++y) {
memcpy(dst_y, src_y, width);
src_y += src_pitch_y;
dst_y += dst_pitch_y;
}
}
static void I420CopyPlane2(const uint8* src, int src_pitch_0, int src_pitch_1,
uint8* dst, int dst_pitch,
int width, int height) {
// Copy plane
for (int y = 0; y < height; y += 2) {
memcpy(dst, src, width);
src += src_pitch_0;
dst += dst_pitch;
memcpy(dst, src, width);
src += src_pitch_1;
dst += dst_pitch;
}
}
// Support converting from FOURCC_M420
// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for
// easy conversion to I420.
// M420 format description:
// M420 is row biplanar 420: 2 rows of Y and 1 row of VU.
// Chroma is half width / half height. (420)
// pitch_m420 is row planar. Normally this will be the width in pixels.
// The UV plane is half width, but 2 values, so pitch_m420 applies to this
// as well as the two Y planes.
// TODO(fbarchard): Do NV21/NV12 formats with this function
static void X420ToI420(uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
const uint8* src_y,
int src_pitch_y0, int src_pitch_y1,
const uint8* src_uv, int src_pitch_uv,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_pitch_y;
dst_u = dst_u + (height - 1) * dst_pitch_u;
dst_v = dst_v + (height - 1) * dst_pitch_v;
dst_pitch_y = -dst_pitch_y;
dst_pitch_u = -dst_pitch_u;
dst_pitch_v = -dst_pitch_v;
}
int halfwidth = (width + 1) >> 1;
void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
#if defined(HAS_SPLITUV_NEON)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasNEON) &&
(halfwidth % 16 == 0) &&
IS_ALIGNED(src_uv, 16) && (src_pitch_uv % 16 == 0) &&
IS_ALIGNED(dst_u, 16) && (dst_pitch_u % 16 == 0) &&
IS_ALIGNED(dst_v, 16) && (dst_pitch_v % 16 == 0)) {
SplitUV = SplitUV_NEON;
} else
#elif defined(HAS_SPLITUV_SSE2)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) &&
(halfwidth % 16 == 0) &&
IS_ALIGNED(src_uv, 16) && (src_pitch_uv % 16 == 0) &&
IS_ALIGNED(dst_u, 16) && (dst_pitch_u % 16 == 0) &&
IS_ALIGNED(dst_v, 16) && (dst_pitch_v % 16 == 0)) {
SplitUV = SplitUV_SSE2;
} else
#endif
{
SplitUV = SplitUV_C;
}
I420CopyPlane2(src_y, src_pitch_y0, src_pitch_y1, dst_y, dst_pitch_y,
width, height);
int halfheight = (height + 1) >> 1;
for (int y = 0; y < halfheight; ++y) {
// Copy a row of UV.
SplitUV(src_uv, dst_u, dst_v, halfwidth);
dst_u += dst_pitch_u;
dst_v += dst_pitch_v;
src_uv += src_pitch_uv;
}
}
// TODO(fbarchard): For biplanar formats (ie NV21), the Y plane is the same
// as I420, and only the chroma plane varies. Copy the Y plane by reference,
// and just convert the UV. This method can be used for NV21, NV12, I420,
// I422, M422. 8 of the 12 bits is Y, so this would copy 3 times less data,
// which is approximately how much faster it would be.
// Helper function to copy yuv data without scaling. Used
// by our jpeg conversion callbacks to incrementally fill a yuv image.
void PlanarFunctions::I420Copy(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_pitch_y;
src_u = src_u + (height - 1) * src_pitch_u;
src_v = src_v + (height - 1) * src_pitch_v;
src_pitch_y = -src_pitch_y;
src_pitch_u = -src_pitch_u;
src_pitch_v = -src_pitch_v;
}
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
I420CopyPlane(src_y, src_pitch_y, dst_y, dst_pitch_y, width, height);
I420CopyPlane(src_u, src_pitch_u, dst_u, dst_pitch_u, halfwidth, halfheight);
I420CopyPlane(src_v, src_pitch_v, dst_v, dst_pitch_v, halfwidth, halfheight);
}
// Helper function to copy yuv data without scaling. Used
// by our jpeg conversion callbacks to incrementally fill a yuv image.
void PlanarFunctions::I422ToI420(const uint8* src_y, int src_pitch_y,
const uint8* src_u, int src_pitch_u,
const uint8* src_v, int src_pitch_v,
uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
int width, int height) {
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_pitch_y;
src_u = src_u + (height - 1) * src_pitch_u;
src_v = src_v + (height - 1) * src_pitch_v;
src_pitch_y = -src_pitch_y;
src_pitch_u = -src_pitch_u;
src_pitch_v = -src_pitch_v;
}
// Copy Y plane
I420CopyPlane(src_y, src_pitch_y, dst_y, dst_pitch_y, width, height);
// SubSample UV planes.
int x, y;
int halfwidth = (width + 1) >> 1;
for (y = 0; y < height; y += 2) {
const uint8* u0 = src_u;
const uint8* u1 = src_u + src_pitch_u;
if ((y + 1) >= height) {
u1 = u0;
}
for (x = 0; x < halfwidth; ++x) {
dst_u[x] = (u0[x] + u1[x] + 1) >> 1;
}
src_u += src_pitch_u * 2;
dst_u += dst_pitch_u;
}
for (y = 0; y < height; y += 2) {
const uint8* v0 = src_v;
const uint8* v1 = src_v + src_pitch_v;
if ((y + 1) >= height) {
v1 = v0;
}
for (x = 0; x < halfwidth; ++x) {
dst_v[x] = (v0[x] + v1[x] + 1) >> 1;
}
src_v += src_pitch_v * 2;
dst_v += dst_pitch_v;
}
}
// Convert M420 to I420.
void PlanarFunctions::M420ToI420(uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
const uint8* m420, int pitch_m420,
int width, int height) {
X420ToI420(dst_y, dst_pitch_y, dst_u, dst_pitch_u, dst_v, dst_pitch_v,
m420, pitch_m420, pitch_m420 * 2,
m420 + pitch_m420 * 2, pitch_m420 * 3,
width, height);
}
// Convert NV12 to I420.
void PlanarFunctions::NV12ToI420(uint8* dst_y, int dst_pitch_y,
uint8* dst_u, int dst_pitch_u,
uint8* dst_v, int dst_pitch_v,
const uint8* src_y,
const uint8* src_uv,
int src_pitch,
int width, int height) {
X420ToI420(dst_y, dst_pitch_y, dst_u, dst_pitch_u, dst_v, dst_pitch_v,
src_y, src_pitch, src_pitch,
src_uv, src_pitch,
width, height);
}
} // namespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef LIBYUV_SOURCE_ROW_H_
#define LIBYUV_SOURCE_ROW_H_
#include "basic_types.h"
extern "C" {
// Can only do 1x.
// This is the second fastest of the scalers.
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
#if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#else
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#endif
#ifdef OSX
extern SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
#else
extern SIMD_ALIGNED(const int16 _kCoefficientsRgbY[768][4]);
#endif
// Method to force C version.
//#define USE_MMX 0
//#define USE_SSE2 0
#if !defined(USE_MMX)
// Windows, Mac and Linux use MMX
#if defined(__i386__) || defined(_MSC_VER)
#define USE_MMX 1
#else
#define USE_MMX 0
#endif
#endif
#if !defined(USE_SSE2)
#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2
#define USE_SSE2 1
#else
#define USE_SSE2 0
#endif
#endif
// x64 uses MMX2 (SSE) so emms is not required.
// Warning C4799: function has no EMMS instruction.
// EMMS() is slow and should be called by the calling function once per image.
#if USE_MMX && !defined(ARCH_CPU_X86_64)
#if defined(_MSC_VER)
#define EMMS() __asm emms
#pragma warning(disable: 4799)
#else
#define EMMS() asm("emms")
#endif
#else
#define EMMS()
#endif
} // extern "C"
#endif // LIBYUV_SOURCE_ROW_H_
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "row.h"
extern "C" {
#if defined(__x86_64__)
// 64 bit linux gcc version
void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"1:"
"movzb (%1),%%r10\n"
"lea 1(%1),%1\n"
"movzb (%2),%%r11\n"
"lea 1(%2),%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"movzb 0x1(%0),%%r11\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%r10,8),%%xmm2\n"
"lea 2(%0),%0\n"
"movq (%5,%%r11,8),%%xmm3\n"
"paddsw %%xmm0,%%xmm2\n"
"paddsw %%xmm0,%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"lea 8(%3),%3\n"
"sub $0x2,%4\n"
"ja 1b\n"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (_kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
#elif defined(__i386__)
// 32 bit gcc version
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text\n"
#if defined(OSX) || defined(IOS)
".globl _FastConvertYUVToRGB32Row\n"
"_FastConvertYUVToRGB32Row:\n"
#else
".global FastConvertYUVToRGB32Row\n"
"FastConvertYUVToRGB32Row:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"1:"
"movzbl (%edi),%eax\n"
"lea 1(%edi),%edi\n"
"movzbl (%esi),%ebx\n"
"lea 1(%esi),%esi\n"
"movq _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq _kCoefficientsRgbY(,%eax,8),%mm1\n"
"lea 2(%edx),%edx\n"
"movq _kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"lea 8(%ebp),%ebp\n"
"sub $0x2,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
#else
// C reference code that mimic the YUV assembly.
#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
(((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
static inline void YuvPixel(uint8 y,
uint8 u,
uint8 v,
uint8* rgb_buf) {
int b = _kCoefficientsRgbY[256+u][0];
int g = _kCoefficientsRgbY[256+u][1];
int r = _kCoefficientsRgbY[256+u][2];
int a = _kCoefficientsRgbY[256+u][3];
b = paddsw(b, _kCoefficientsRgbY[512+v][0]);
g = paddsw(g, _kCoefficientsRgbY[512+v][1]);
r = paddsw(r, _kCoefficientsRgbY[512+v][2]);
a = paddsw(a, _kCoefficientsRgbY[512+v][3]);
b = paddsw(b, _kCoefficientsRgbY[y][0]);
g = paddsw(g, _kCoefficientsRgbY[y][1]);
r = paddsw(r, _kCoefficientsRgbY[y][2]);
a = paddsw(a, _kCoefficientsRgbY[y][3]);
b >>= 6;
g >>= 6;
r >>= 6;
a >>= 6;
*reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
(packuswb(g) << 8) |
(packuswb(r) << 16) |
(packuswb(a) << 24);
}
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
for (int x = 0; x < width; x += 2) {
uint8 u = u_buf[x >> 1];
uint8 v = v_buf[x >> 1];
uint8 y0 = y_buf[x];
YuvPixel(y0, u, v, rgb_buf);
if ((x + 1) < width) {
uint8 y1 = y_buf[x + 1];
YuvPixel(y1, u, v, rgb_buf + 4);
}
rgb_buf += 8; // Advance 2 pixels.
}
}
#endif
} // extern "C"
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "row.h"
extern "C" {
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
#define RGBU(i) { \
static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
0, \
static_cast<int16>(256 * 64 - 1) \
}
#define RGBV(i) { \
0, \
static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
0 \
}
#ifdef OSX
SIMD_ALIGNED(const int16 kCoefficientsRgbY[256 * 3][4]) = {
#else
SIMD_ALIGNED(const int16 _kCoefficientsRgbY[256 * 3][4]) = {
#endif
// Luminance table.
RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
// Chroma U table.
RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
// Chroma V table.
RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
};
#undef RGBY
#undef RGBU
#undef RGBV
} // extern "C"
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "scale.h"
#include <string.h>
#include "common.h"
#include "cpu_id.h"
// Note: A Neon reference manual
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0204j/CJAJIIGG.html
// Note: Some SSE2 reference manuals
// cpuvol1.pdf agner_instruction_tables.pdf 253666.pdf 253667.pdf
// TODO(fbarchard): Remove once performance is known
//#define TEST_RSTSC
#if defined(TEST_RSTSC)
#include <iomanip>
#include <iostream>
#ifdef _MSC_VER
#include <emmintrin.h>
#endif
#if defined(__GNUC__) && defined(__i386__)
static inline uint64 __rdtsc(void) {
uint32_t a, d;
__asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
return ((uint64)d << 32) + a;
}
#endif
#endif
namespace libyuv {
// Set the following flag to true to revert to only
// using the reference implementation ScalePlaneBox(), and
// NOT the optimized versions. Useful for debugging and
// when comparing the quality of the resulting YUV planes
// as produced by the optimized and non-optimized versions.
bool YuvScaler::use_reference_impl_ = false;
/**
* NEON downscalers with interpolation.
*
* Provided by Fritz Koenig
*
*/
#if defined(__ARM_NEON__) && !defined(COVERAGE_ENABLED)
#define HAS_SCALEROWDOWN2_NEON
void ScaleRowDown2_NEON(const uint8* iptr, int32 /* istride */,
uint8* dst, int32 owidth) {
__asm__ volatile
(
"1:\n"
"vld2.u8 {q0,q1}, [%0]! \n" // load even pixels into q0, odd into q1
"vst1.u8 {q0}, [%1]! \n" // store even pixels
"subs %2, %2, #16 \n" // 16 processed per loop
"bhi 1b \n"
: // Output registers
: "r"(iptr), "r"(dst), "r"(owidth) // Input registers
: "r4", "q0", "q1" // Clobber List
);
}
void ScaleRowDown2Int_NEON(const uint8* iptr, int32 istride,
uint8* dst, int32 owidth) {
__asm__ volatile
(
"mov r4, #2 \n" // rounding constant
"add %1, %0 \n" // l2
"vdup.16 q4, r4 \n"
"1:\n"
"vld1.u8 {q0,q1}, [%0]! \n" // load l1 and post increment
"vld1.u8 {q2,q3}, [%1]! \n" // load l2 and post increment
"vpaddl.u8 q0, q0 \n" // l1 add adjacent
"vpaddl.u8 q1, q1 \n"
"vpadal.u8 q0, q2 \n" // l2 add adjacent and add l1 to l2
"vpadal.u8 q1, q3 \n"
"vadd.u16 q0, q4 \n" // rounding
"vadd.u16 q1, q4 \n"
"vshrn.u16 d0, q0, #2 \n" // downshift and pack
"vshrn.u16 d1, q1, #2 \n"
"vst1.u8 {q0}, [%2]! \n"
"subs %3, %3, #16 \n" // 16 processed per loop
"bhi 1b \n"
: // Output registers
: "r"(iptr), "r"(istride), "r"(dst), "r"(owidth) // Input registers
: "r4", "q0", "q1", "q2", "q3", "q4" // Clobber List
);
}
/**
* SSE2 downscalers with interpolation.
*
* Provided by Frank Barchard (fbarchard@google.com)
*
*/
// Constants for SSE2 code
#elif (defined(WIN32) || defined(__i386__)) && !defined(COVERAGE_ENABLED) && \
!defined(__PIC__) && !TARGET_IPHONE_SIMULATOR
#if defined(_MSC_VER)
#define TALIGN16(t, var) __declspec(align(16)) t _ ## var
#elif defined(OSX)
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
#else
#define TALIGN16(t, var) t _ ## var __attribute__((aligned(16)))
#endif
// Offsets for source bytes 0 to 9
extern "C" TALIGN16(const uint8, shuf0[16]) =
{ 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 };
// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12.
extern "C" TALIGN16(const uint8, shuf1[16]) =
{ 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 };
// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
extern "C" TALIGN16(const uint8, shuf2[16]) =
{ 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 };
// Offsets for source bytes 0 to 10
extern "C" TALIGN16(const uint8, shuf01[16]) =
{ 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 };
// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13.
extern "C" TALIGN16(const uint8, shuf11[16]) =
{ 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 };
// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31.
extern "C" TALIGN16(const uint8, shuf21[16]) =
{ 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 };
// Coefficients for source bytes 0 to 10
extern "C" TALIGN16(const uint8, madd01[16]) =
{ 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 };
// Coefficients for source bytes 10 to 21
extern "C" TALIGN16(const uint8, madd11[16]) =
{ 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 };
// Coefficients for source bytes 21 to 31
extern "C" TALIGN16(const uint8, madd21[16]) =
{ 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 };
// Coefficients for source bytes 21 to 31
extern "C" TALIGN16(const int16, round34[8]) =
{ 2, 2, 2, 2, 2, 2, 2, 2 };
extern "C" TALIGN16(const uint8, shuf38a[16]) =
{ 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
extern "C" TALIGN16(const uint8, shuf38b[16]) =
{ 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 };
// Arrange words 0,3,6 into 0,1,2
extern "C" TALIGN16(const uint8, shufac0[16]) =
{ 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 };
// Arrange words 0,3,6 into 3,4,5
extern "C" TALIGN16(const uint8, shufac3[16]) =
{ 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 };
// Scaling values for boxes of 3x3 and 2x3
extern "C" TALIGN16(const uint16, scaleac3[8]) =
{ 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 };
// Arrange first value for pixels 0,1,2,3,4,5
extern "C" TALIGN16(const uint8, shufab0[16]) =
{ 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 };
// Arrange second value for pixels 0,1,2,3,4,5
extern "C" TALIGN16(const uint8, shufab1[16]) =
{ 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 };
// Arrange third value for pixels 0,1,2,3,4,5
extern "C" TALIGN16(const uint8, shufab2[16]) =
{ 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
// Scaling values for boxes of 3x2 and 2x2
extern "C" TALIGN16(const uint16, scaleab2[8]) =
{ 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
#endif
#if defined(WIN32) && !defined(COVERAGE_ENABLED)
#define HAS_SCALEROWDOWN2_SSE2
// Reads 32 pixels, throws half away and writes 16 pixels.
// Alignment requirement: iptr 16 byte aligned, optr 16 byte aligned.
__declspec(naked)
static void ScaleRowDown2_SSE2(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth) {
__asm {
mov eax, [esp + 4] // iptr
// istride ignored
mov edx, [esp + 12] // optr
mov ecx, [esp + 16] // owidth
pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff
psrlw xmm7, 8
wloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
lea eax, [eax + 32]
pand xmm0, xmm7
pand xmm1, xmm7
packuswb xmm0, xmm1
movdqa [edx], xmm0
lea edx, [edx + 16]
sub ecx, 16
ja wloop
ret
}
}
// Blends 32x2 rectangle to 16x1.
// Alignment requirement: iptr 16 byte aligned, optr 16 byte aligned.
__declspec(naked)
static void ScaleRowDown2Int_SSE2(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth) {
__asm {
push esi
mov eax, [esp + 4 + 4] // iptr
mov esi, [esp + 4 + 8] // istride
mov edx, [esp + 4 + 12] // optr
mov ecx, [esp + 4 + 16] // owidth
pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff
psrlw xmm7, 8
wloop:
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
movdqa xmm2, [eax + esi]
movdqa xmm3, [eax + esi + 16]
lea eax, [eax + 32]
pavgb xmm0, xmm2 // average rows
pavgb xmm1, xmm3
movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
psrlw xmm0, 8
movdqa xmm3, xmm1
psrlw xmm1, 8
pand xmm2, xmm7
pand xmm3, xmm7
pavgw xmm0, xmm2
pavgw xmm1, xmm3
packuswb xmm0, xmm1
movdqa [edx], xmm0
lea edx, [edx + 16]
sub ecx, 16
ja wloop
pop esi
ret
}
}
#define HAS_SCALEROWDOWN4_SSE2
// Point samples 32 pixels to 8 pixels.
// Alignment requirement: iptr 16 byte aligned, optr 8 byte aligned.
__declspec(naked)
static void ScaleRowDown4_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
// istride ignored
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
pcmpeqb xmm7, xmm7 // generate mask 0x000000ff
psrld xmm7, 24
wloop:
movdqa xmm0, [esi]
movdqa xmm1, [esi + 16]
lea esi, [esi + 32]
pand xmm0, xmm7
pand xmm1, xmm7
packuswb xmm0, xmm1
packuswb xmm0, xmm0
movq qword ptr [edi], xmm0
lea edi, [edi + 8]
sub ecx, 8
ja wloop
popad
ret
}
}
// Blends 32x4 rectangle to 8x1.
// Alignment requirement: iptr 16 byte aligned, optr 8 byte aligned.
__declspec(naked)
static void ScaleRowDown4Int_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov ebx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff
psrlw xmm7, 8
lea edx, [ebx + ebx * 2] // istride * 3
wloop:
movdqa xmm0, [esi]
movdqa xmm1, [esi + 16]
movdqa xmm2, [esi + ebx]
movdqa xmm3, [esi + ebx + 16]
pavgb xmm0, xmm2 // average rows
pavgb xmm1, xmm3
movdqa xmm2, [esi + ebx * 2]
movdqa xmm3, [esi + ebx * 2 + 16]
movdqa xmm4, [esi + edx]
movdqa xmm5, [esi + edx + 16]
lea esi, [esi + 32]
pavgb xmm2, xmm4
pavgb xmm3, xmm5
pavgb xmm0, xmm2
pavgb xmm1, xmm3
movdqa xmm2, xmm0 // average columns (32 to 16 pixels)
psrlw xmm0, 8
movdqa xmm3, xmm1
psrlw xmm1, 8
pand xmm2, xmm7
pand xmm3, xmm7
pavgw xmm0, xmm2
pavgw xmm1, xmm3
packuswb xmm0, xmm1
movdqa xmm2, xmm0 // average columns (16 to 8 pixels)
psrlw xmm0, 8
pand xmm2, xmm7
pavgw xmm0, xmm2
packuswb xmm0, xmm0
movq qword ptr [edi], xmm0
lea edi, [edi + 8]
sub ecx, 8
ja wloop
popad
ret
}
}
#define HAS_SCALEROWDOWN8_SSE2
// Point samples 32 pixels to 4 pixels.
// Alignment requirement: iptr 16 byte aligned, optr 4 byte aligned.
__declspec(naked)
static void ScaleRowDown8_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
// istride ignored
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
pcmpeqb xmm7, xmm7 // generate mask isolating 1 in 8 bytes
psrlq xmm7, 56
wloop:
movdqa xmm0, [esi]
movdqa xmm1, [esi + 16]
lea esi, [esi + 32]
pand xmm0, xmm7
pand xmm1, xmm7
packuswb xmm0, xmm1 // 32->16
packuswb xmm0, xmm0 // 16->8
packuswb xmm0, xmm0 // 8->4
movd dword ptr [edi], xmm0
lea edi, [edi + 4]
sub ecx, 4
ja wloop
popad
ret
}
}
// Blends 32x8 rectangle to 4x1.
// Alignment requirement: iptr 16 byte aligned, optr 4 byte aligned.
__declspec(naked)
static void ScaleRowDown8Int_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov ebx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
lea edx, [ebx + ebx * 2] // istride * 3
pxor xmm7, xmm7
wloop:
movdqa xmm0, [esi] // average 8 rows to 1
movdqa xmm1, [esi + 16]
movdqa xmm2, [esi + ebx]
movdqa xmm3, [esi + ebx + 16]
pavgb xmm0, xmm2
pavgb xmm1, xmm3
movdqa xmm2, [esi + ebx * 2]
movdqa xmm3, [esi + ebx * 2 + 16]
movdqa xmm4, [esi + edx]
movdqa xmm5, [esi + edx + 16]
lea ebp, [esi + ebx * 4]
lea esi, [esi + 32]
pavgb xmm2, xmm4
pavgb xmm3, xmm5
pavgb xmm0, xmm2
pavgb xmm1, xmm3
movdqa xmm2, [ebp]
movdqa xmm3, [ebp + 16]
movdqa xmm4, [ebp + ebx]
movdqa xmm5, [ebp + ebx + 16]
pavgb xmm2, xmm4
pavgb xmm3, xmm5
movdqa xmm4, [ebp + ebx * 2]
movdqa xmm5, [ebp + ebx * 2 + 16]
movdqa xmm6, [ebp + edx]
pavgb xmm4, xmm6
movdqa xmm6, [ebp + edx + 16]
pavgb xmm5, xmm6
pavgb xmm2, xmm4
pavgb xmm3, xmm5
pavgb xmm0, xmm2
pavgb xmm1, xmm3
psadbw xmm0, xmm7 // average 32 pixels to 4
psadbw xmm1, xmm7
pshufd xmm0, xmm0, 0xd8 // x1x0 -> xx01
pshufd xmm1, xmm1, 0x8d // x3x2 -> 32xx
por xmm0, xmm1 // -> 3201
psrlw xmm0, 3
packuswb xmm0, xmm0
packuswb xmm0, xmm0
movd dword ptr [edi], xmm0
lea edi, [edi + 4]
sub ecx, 4
ja wloop
popad
ret
}
}
#define HAS_SCALEROWDOWN34_SSSE3
// Point samples 32 pixels to 24 pixels.
// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
// Then shuffled to do the scaling.
// Note that movdqa+palign may be better than movdqu.
// Alignment requirement: iptr 16 byte aligned, optr 8 byte aligned.
__declspec(naked)
static void ScaleRowDown34_SSSE3(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
// istride ignored
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
movdqa xmm3, _shuf0
movdqa xmm4, _shuf1
movdqa xmm5, _shuf2
wloop:
movdqa xmm0, [esi]
movdqa xmm2, [esi + 16]
lea esi, [esi + 32]
movdqa xmm1, xmm2
palignr xmm1, xmm0, 8
pshufb xmm0, xmm3
pshufb xmm1, xmm4
pshufb xmm2, xmm5
movq qword ptr [edi], xmm0
movq qword ptr [edi + 8], xmm1
movq qword ptr [edi + 16], xmm2
lea edi, [edi + 24]
sub ecx, 24
ja wloop
popad
ret
}
}
// Blends 32x2 rectangle to 24x1
// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.
// Then shuffled to do the scaling.
// Register usage:
// xmm0 irow 0
// xmm1 irow 1
// xmm2 shuf 0
// xmm3 shuf 1
// xmm4 shuf 2
// xmm5 madd 0
// xmm6 madd 1
// xmm7 round34
// Note that movdqa+palign may be better than movdqu.
// Alignment requirement: iptr 16 byte aligned, optr 8 byte aligned.
__declspec(naked)
static void ScaleRowDown34_1_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov ebx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
movdqa xmm2, _shuf01
movdqa xmm3, _shuf11
movdqa xmm4, _shuf21
movdqa xmm5, _madd01
movdqa xmm6, _madd11
movdqa xmm7, _round34
wloop:
movdqa xmm0, [esi] // pixels 0..7
movdqa xmm1, [esi+ebx]
pavgb xmm0, xmm1
pshufb xmm0, xmm2
pmaddubsw xmm0, xmm5
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edi], xmm0
movdqu xmm0, [esi+8] // pixels 8..15
movdqu xmm1, [esi+ebx+8]
pavgb xmm0, xmm1
pshufb xmm0, xmm3
pmaddubsw xmm0, xmm6
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edi+8], xmm0
movdqa xmm0, [esi+16] // pixels 16..23
movdqa xmm1, [esi+ebx+16]
lea esi, [esi+32]
pavgb xmm0, xmm1
pshufb xmm0, xmm4
movdqa xmm1, _madd21
pmaddubsw xmm0, xmm1
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edi+16], xmm0
lea edi, [edi+24]
sub ecx, 24
ja wloop
popad
ret
}
}
// Note that movdqa+palign may be better than movdqu.
// Alignment requirement: iptr 16 byte aligned, optr 8 byte aligned.
__declspec(naked)
static void ScaleRowDown34_0_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov ebx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
movdqa xmm2, _shuf01
movdqa xmm3, _shuf11
movdqa xmm4, _shuf21
movdqa xmm5, _madd01
movdqa xmm6, _madd11
movdqa xmm7, _round34
wloop:
movdqa xmm0, [esi] // pixels 0..7
movdqa xmm1, [esi+ebx]
pavgb xmm1, xmm0
pavgb xmm0, xmm1
pshufb xmm0, xmm2
pmaddubsw xmm0, xmm5
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edi], xmm0
movdqu xmm0, [esi+8] // pixels 8..15
movdqu xmm1, [esi+ebx+8]
pavgb xmm1, xmm0
pavgb xmm0, xmm1
pshufb xmm0, xmm3
pmaddubsw xmm0, xmm6
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edi+8], xmm0
movdqa xmm0, [esi+16] // pixels 16..23
movdqa xmm1, [esi+ebx+16]
lea esi, [esi+32]
pavgb xmm1, xmm0
pavgb xmm0, xmm1
pshufb xmm0, xmm4
movdqa xmm1, _madd21
pmaddubsw xmm0, xmm1
paddsw xmm0, xmm7
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edi+16], xmm0
lea edi, [edi+24]
sub ecx, 24
ja wloop
popad
ret
}
}
#define HAS_SCALEROWDOWN38_SSSE3
// 3/8 point sampler
// Scale 32 pixels to 12
__declspec(naked)
static void ScaleRowDown38_SSSE3(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov edx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // optr
mov ecx, [esp + 32 + 16] // owidth
movdqa xmm5, _shuf38a
movdqa xmm6, _shuf38b
pxor xmm7, xmm7
xloop:
movdqa xmm0, [esi] // 16 pixels -> 0,1,2,3,4,5
movdqa xmm1, [esi + 16] // 16 pixels -> 6,7,8,9,10,11
lea esi, [esi + 32]
pshufb xmm0, xmm5
pshufb xmm1, xmm6
paddusb xmm0, xmm1
movq qword ptr [edi], xmm0 // write 12 pixels
movhlps xmm1, xmm0
movd [edi + 8], xmm1
lea edi, [edi + 12]
sub ecx, 12
ja xloop
popad
ret
}
}
// Scale 16x3 pixels to 6x1 with interpolation
__declspec(naked)
static void ScaleRowDown38_3_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov edx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // optr
mov ecx, [esp + 32 + 16] // owidth
movdqa xmm4, _shufac0
movdqa xmm5, _shufac3
movdqa xmm6, _scaleac3
pxor xmm7, xmm7
xloop:
movdqa xmm0, [esi] // sum up 3 rows into xmm0/1
movdqa xmm2, [esi + edx]
movhlps xmm1, xmm0
movhlps xmm3, xmm2
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm2, xmm7
punpcklbw xmm3, xmm7
paddusw xmm0, xmm2
paddusw xmm1, xmm3
movdqa xmm2, [esi + edx * 2]
lea esi, [esi + 16]
movhlps xmm3, xmm2
punpcklbw xmm2, xmm7
punpcklbw xmm3, xmm7
paddusw xmm0, xmm2
paddusw xmm1, xmm3
movdqa xmm2, xmm0 // 8 pixels -> 0,1,2 of xmm2
psrldq xmm0, 2
paddusw xmm2, xmm0
psrldq xmm0, 2
paddusw xmm2, xmm0
pshufb xmm2, xmm4
movdqa xmm3, xmm1 // 8 pixels -> 3,4,5 of xmm2
psrldq xmm1, 2
paddusw xmm3, xmm1
psrldq xmm1, 2
paddusw xmm3, xmm1
pshufb xmm3, xmm5
paddusw xmm2, xmm3
pmulhuw xmm2, xmm6 // divide by 9,9,6, 9,9,6
packuswb xmm2, xmm2
movd [edi], xmm2 // write 6 pixels
pextrw eax, xmm2, 2
mov [edi + 4], ax
lea edi, [edi + 6]
sub ecx, 6
ja xloop
popad
ret
}
}
// Scale 16x2 pixels to 6x1 with interpolation
__declspec(naked)
static void ScaleRowDown38_2_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov edx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // optr
mov ecx, [esp + 32 + 16] // owidth
movdqa xmm4, _shufab0
movdqa xmm5, _shufab1
movdqa xmm6, _shufab2
movdqa xmm7, _scaleab2
xloop:
movdqa xmm2, [esi] // average 2 rows into xmm2
pavgb xmm2, [esi + edx]
lea esi, [esi + 16]
movdqa xmm0, xmm2 // 16 pixels -> 0,1,2,3,4,5 of xmm0
pshufb xmm0, xmm4
movdqa xmm1, xmm2
pshufb xmm1, xmm5
paddusw xmm0, xmm1
pshufb xmm2, xmm6
paddusw xmm0, xmm2
pmulhuw xmm0, xmm7 // divide by 3,3,2, 3,3,2
packuswb xmm0, xmm0
movd [edi], xmm0 // write 6 pixels
pextrw eax, xmm0, 2
mov [edi + 4], ax
lea edi, [edi + 6]
sub ecx, 6
ja xloop
popad
ret
}
}
#define HAS_SCALEADDROWS_SSE2
// Reads 8xN bytes and produces 16 shorts at a time.
__declspec(naked)
static void ScaleAddRows_SSE2(const uint8* iptr, int32 istride,
uint16* orow, int32 iwidth, int32 iheight) {
__asm {
pushad
mov esi, [esp + 32 + 4] // iptr
mov edx, [esp + 32 + 8] // istride
mov edi, [esp + 32 + 12] // orow
mov ecx, [esp + 32 + 16] // owidth
mov ebx, [esp + 32 + 20] // height
pxor xmm7, xmm7
dec ebx
xloop:
// first row
movdqa xmm2, [esi]
lea eax, [esi + edx]
movhlps xmm3, xmm2
mov ebp, ebx
punpcklbw xmm2, xmm7
punpcklbw xmm3, xmm7
// sum remaining rows
yloop:
movdqa xmm0, [eax] // read 16 pixels
lea eax, [eax + edx] // advance to next row
movhlps xmm1, xmm0
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
paddusw xmm2, xmm0 // sum 16 words
paddusw xmm3, xmm1
sub ebp, 1
ja yloop
movdqa [edi], xmm2
movdqa [edi + 16], xmm3
lea edi, [edi + 32]
lea esi, [esi + 16]
sub ecx, 16
ja xloop
popad
ret
}
}
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version.
#define HAS_SCALEFILTERROWS_SSE2
__declspec(naked)
static void ScaleFilterRows_SSE2(uint8* optr, const uint8* iptr0, int32 istride,
int owidth, int source_y_fraction) {
__asm {
push esi
push edi
mov edi, [esp + 8 + 4] // optr
mov esi, [esp + 8 + 8] // iptr0
mov edx, [esp + 8 + 12] // istride
mov ecx, [esp + 8 + 16] // owidth
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
cmp eax, 0
je xloop1
cmp eax, 128
je xloop2
movd xmm6, eax // xmm6 = y fraction
punpcklwd xmm6, xmm6
pshufd xmm6, xmm6, 0
neg eax // xmm5 = 256 - y fraction
add eax, 256
movd xmm5, eax
punpcklwd xmm5, xmm5
pshufd xmm5, xmm5, 0
pxor xmm7, xmm7
xloop:
movdqa xmm0, [esi]
movdqa xmm2, [esi + edx]
lea esi, [esi + 16]
movdqa xmm1, xmm0
movdqa xmm3, xmm2
punpcklbw xmm0, xmm7
punpcklbw xmm2, xmm7
punpckhbw xmm1, xmm7
punpckhbw xmm3, xmm7
pmullw xmm0, xmm5 // scale row 0
pmullw xmm1, xmm5
pmullw xmm2, xmm6 // scale row 1
pmullw xmm3, xmm6
paddusw xmm0, xmm2 // sum rows
paddusw xmm1, xmm3
psrlw xmm0, 8
psrlw xmm1, 8
packuswb xmm0, xmm1
movdqa [edi], xmm0
lea edi, [edi + 16]
sub ecx, 16
ja xloop
mov al, [edi - 1]
mov [edi], al
pop edi
pop esi
ret
xloop1:
movdqa xmm0, [esi]
lea esi, [esi + 16]
movdqa [edi], xmm0
lea edi, [edi + 16]
sub ecx, 16
ja xloop1
mov al, [edi - 1]
mov [edi], al
pop edi
pop esi
ret
xloop2:
movdqa xmm0, [esi]
movdqa xmm2, [esi + edx]
lea esi, [esi + 16]
pavgb xmm0, xmm2
movdqa [edi], xmm0
lea edi, [edi + 16]
sub ecx, 16
ja xloop2
mov al, [edi - 1]
mov [edi], al
pop edi
pop esi
ret
}
}
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version.
#define HAS_SCALEFILTERROWS_SSSE3
__declspec(naked)
static void ScaleFilterRows_SSSE3(uint8* optr, const uint8* iptr0, int32 istride,
int owidth, int source_y_fraction) {
__asm {
push esi
push edi
mov edi, [esp + 8 + 4] // optr
mov esi, [esp + 8 + 8] // iptr0
mov edx, [esp + 8 + 12] // istride
mov ecx, [esp + 8 + 16] // owidth
mov eax, [esp + 8 + 20] // source_y_fraction (0..255)
cmp eax, 0
je xloop1
cmp eax, 128
je xloop2
shr eax, 1
mov ah,al
neg al
add al, 128
movd xmm7, eax
punpcklwd xmm7, xmm7
pshufd xmm7, xmm7, 0
xloop:
movdqa xmm0, [esi]
movdqa xmm2, [esi + edx]
lea esi, [esi + 16]
movdqa xmm1, xmm0
punpcklbw xmm0, xmm2
punpckhbw xmm1, xmm2
pmaddubsw xmm0, xmm7
pmaddubsw xmm1, xmm7
psrlw xmm0, 7
psrlw xmm1, 7
packuswb xmm0, xmm1
movdqa [edi], xmm0
lea edi, [edi + 16]
sub ecx, 16
ja xloop
mov al, [edi - 1]
mov [edi], al
pop edi
pop esi
ret
xloop1:
movdqa xmm0, [esi]
lea esi, [esi + 16]
movdqa [edi], xmm0
lea edi, [edi + 16]
sub ecx, 16
ja xloop1
mov al, [edi - 1]
mov [edi], al
pop edi
pop esi
ret
xloop2:
movdqa xmm0, [esi]
movdqa xmm2, [esi + edx]
lea esi, [esi + 16]
pavgb xmm0, xmm2
movdqa [edi], xmm0
lea edi, [edi + 16]
sub ecx, 16
ja xloop2
mov al, [edi - 1]
mov [edi], al
pop edi
pop esi
ret
}
}
// Note that movdqa+palign may be better than movdqu.
// Alignment requirement: iptr 16 byte aligned, optr 8 byte aligned.
__declspec(naked)
static void ScaleFilterCols34_SSSE3(uint8* optr, const uint8* iptr,
int owidth) {
__asm {
mov edx, [esp + 4] // optr
mov eax, [esp + 8] // iptr
mov ecx, [esp + 12] // owidth
movdqa xmm1, _round34
movdqa xmm2, _shuf01
movdqa xmm3, _shuf11
movdqa xmm4, _shuf21
movdqa xmm5, _madd01
movdqa xmm6, _madd11
movdqa xmm7, _madd21
wloop:
movdqa xmm0, [eax] // pixels 0..7
pshufb xmm0, xmm2
pmaddubsw xmm0, xmm5
paddsw xmm0, xmm1
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edx], xmm0
movdqu xmm0, [eax+8] // pixels 8..15
pshufb xmm0, xmm3
pmaddubsw xmm0, xmm6
paddsw xmm0, xmm1
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edx+8], xmm0
movdqa xmm0, [eax+16] // pixels 16..23
lea eax, [eax+32]
pshufb xmm0, xmm4
pmaddubsw xmm0, xmm7
paddsw xmm0, xmm1
psrlw xmm0, 2
packuswb xmm0, xmm0
movq qword ptr [edx+16], xmm0
lea edx, [edx+24]
sub ecx, 24
ja wloop
ret
}
}
#elif defined(__i386__) && !defined(COVERAGE_ENABLED) && \
!TARGET_IPHONE_SIMULATOR
// GCC versions of row functions are verbatim conversions from Visual C.
// Generated using gcc disassembly on Visual C object file:
// objdump -D yuvscaler.obj >yuvscaler.txt
#define HAS_SCALEROWDOWN2_SSE2
extern "C" void ScaleRowDown2_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown2_SSE2\n"
"_ScaleRowDown2_SSE2:\n"
#else
".global ScaleRowDown2_SSE2\n"
"ScaleRowDown2_SSE2:\n"
#endif
"mov 0x4(%esp),%eax\n"
"mov 0xc(%esp),%edx\n"
"mov 0x10(%esp),%ecx\n"
"pcmpeqb %xmm7,%xmm7\n"
"psrlw $0x8,%xmm7\n"
"1:"
"movdqa (%eax),%xmm0\n"
"movdqa 0x10(%eax),%xmm1\n"
"lea 0x20(%eax),%eax\n"
"pand %xmm7,%xmm0\n"
"pand %xmm7,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"movdqa %xmm0,(%edx)\n"
"lea 0x10(%edx),%edx\n"
"sub $0x10,%ecx\n"
"ja 1b\n"
"ret\n"
);
extern "C" void ScaleRowDown2Int_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown2Int_SSE2\n"
"_ScaleRowDown2Int_SSE2:\n"
#else
".global ScaleRowDown2Int_SSE2\n"
"ScaleRowDown2Int_SSE2:\n"
#endif
"push %esi\n"
"mov 0x8(%esp),%eax\n"
"mov 0xc(%esp),%esi\n"
"mov 0x10(%esp),%edx\n"
"mov 0x14(%esp),%ecx\n"
"pcmpeqb %xmm7,%xmm7\n"
"psrlw $0x8,%xmm7\n"
"1:"
"movdqa (%eax),%xmm0\n"
"movdqa 0x10(%eax),%xmm1\n"
"movdqa (%eax,%esi,1),%xmm2\n"
"movdqa 0x10(%eax,%esi,1),%xmm3\n"
"lea 0x20(%eax),%eax\n"
"pavgb %xmm2,%xmm0\n"
"pavgb %xmm3,%xmm1\n"
"movdqa %xmm0,%xmm2\n"
"psrlw $0x8,%xmm0\n"
"movdqa %xmm1,%xmm3\n"
"psrlw $0x8,%xmm1\n"
"pand %xmm7,%xmm2\n"
"pand %xmm7,%xmm3\n"
"pavgw %xmm2,%xmm0\n"
"pavgw %xmm3,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"movdqa %xmm0,(%edx)\n"
"lea 0x10(%edx),%edx\n"
"sub $0x10,%ecx\n"
"ja 1b\n"
"pop %esi\n"
"ret\n"
);
#define HAS_SCALEROWDOWN4_SSE2
extern "C" void ScaleRowDown4_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown4_SSE2\n"
"_ScaleRowDown4_SSE2:\n"
#else
".global ScaleRowDown4_SSE2\n"
"ScaleRowDown4_SSE2:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"pcmpeqb %xmm7,%xmm7\n"
"psrld $0x18,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa 0x10(%esi),%xmm1\n"
"lea 0x20(%esi),%esi\n"
"pand %xmm7,%xmm0\n"
"pand %xmm7,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,(%edi)\n"
"lea 0x8(%edi),%edi\n"
"sub $0x8,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
extern "C" void ScaleRowDown4Int_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown4Int_SSE2\n"
"_ScaleRowDown4Int_SSE2:\n"
#else
".global ScaleRowDown4Int_SSE2\n"
"ScaleRowDown4Int_SSE2:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%ebx\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"pcmpeqb %xmm7,%xmm7\n"
"psrlw $0x8,%xmm7\n"
"lea (%ebx,%ebx,2),%edx\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa 0x10(%esi),%xmm1\n"
"movdqa (%esi,%ebx,1),%xmm2\n"
"movdqa 0x10(%esi,%ebx,1),%xmm3\n"
"pavgb %xmm2,%xmm0\n"
"pavgb %xmm3,%xmm1\n"
"movdqa (%esi,%ebx,2),%xmm2\n"
"movdqa 0x10(%esi,%ebx,2),%xmm3\n"
"movdqa (%esi,%edx,1),%xmm4\n"
"movdqa 0x10(%esi,%edx,1),%xmm5\n"
"lea 0x20(%esi),%esi\n"
"pavgb %xmm4,%xmm2\n"
"pavgb %xmm2,%xmm0\n"
"pavgb %xmm5,%xmm3\n"
"pavgb %xmm3,%xmm1\n"
"movdqa %xmm0,%xmm2\n"
"psrlw $0x8,%xmm0\n"
"movdqa %xmm1,%xmm3\n"
"psrlw $0x8,%xmm1\n"
"pand %xmm7,%xmm2\n"
"pand %xmm7,%xmm3\n"
"pavgw %xmm2,%xmm0\n"
"pavgw %xmm3,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"movdqa %xmm0,%xmm2\n"
"psrlw $0x8,%xmm0\n"
"pand %xmm7,%xmm2\n"
"pavgw %xmm2,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,(%edi)\n"
"lea 0x8(%edi),%edi\n"
"sub $0x8,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
#define HAS_SCALEROWDOWN8_SSE2
extern "C" void ScaleRowDown8_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown8_SSE2\n"
"_ScaleRowDown8_SSE2:\n"
#else
".global ScaleRowDown8_SSE2\n"
"ScaleRowDown8_SSE2:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"pcmpeqb %xmm7,%xmm7\n"
"psrlq $0x38,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa 0x10(%esi),%xmm1\n"
"lea 0x20(%esi),%esi\n"
"pand %xmm7,%xmm0\n"
"pand %xmm7,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movd %xmm0,(%edi)\n"
"lea 0x4(%edi),%edi\n"
"sub $0x4,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
extern "C" void ScaleRowDown8Int_SSE2(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown8Int_SSE2\n"
"_ScaleRowDown8Int_SSE2:\n"
#else
".global ScaleRowDown8Int_SSE2\n"
"ScaleRowDown8Int_SSE2:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%ebx\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"lea (%ebx,%ebx,2),%edx\n"
"pxor %xmm7,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa 0x10(%esi),%xmm1\n"
"movdqa (%esi,%ebx,1),%xmm2\n"
"movdqa 0x10(%esi,%ebx,1),%xmm3\n"
"pavgb %xmm2,%xmm0\n"
"pavgb %xmm3,%xmm1\n"
"movdqa (%esi,%ebx,2),%xmm2\n"
"movdqa 0x10(%esi,%ebx,2),%xmm3\n"
"movdqa (%esi,%edx,1),%xmm4\n"
"movdqa 0x10(%esi,%edx,1),%xmm5\n"
"lea (%esi,%ebx,4),%ebp\n"
"lea 0x20(%esi),%esi\n"
"pavgb %xmm4,%xmm2\n"
"pavgb %xmm5,%xmm3\n"
"pavgb %xmm2,%xmm0\n"
"pavgb %xmm3,%xmm1\n"
"movdqa 0x0(%ebp),%xmm2\n"
"movdqa 0x10(%ebp),%xmm3\n"
"movdqa 0x0(%ebp,%ebx,1),%xmm4\n"
"movdqa 0x10(%ebp,%ebx,1),%xmm5\n"
"pavgb %xmm4,%xmm2\n"
"pavgb %xmm5,%xmm3\n"
"movdqa 0x0(%ebp,%ebx,2),%xmm4\n"
"movdqa 0x10(%ebp,%ebx,2),%xmm5\n"
"movdqa 0x0(%ebp,%edx,1),%xmm6\n"
"pavgb %xmm6,%xmm4\n"
"movdqa 0x10(%ebp,%edx,1),%xmm6\n"
"pavgb %xmm6,%xmm5\n"
"pavgb %xmm4,%xmm2\n"
"pavgb %xmm5,%xmm3\n"
"pavgb %xmm2,%xmm0\n"
"pavgb %xmm3,%xmm1\n"
"psadbw %xmm7,%xmm0\n"
"psadbw %xmm7,%xmm1\n"
"pshufd $0xd8,%xmm0,%xmm0\n"
"pshufd $0x8d,%xmm1,%xmm1\n"
"por %xmm1,%xmm0\n"
"psrlw $0x3,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movd %xmm0,(%edi)\n"
"lea 0x4(%edi),%edi\n"
"sub $0x4,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
// fpic is used for magiccam plugin
#if !defined(__PIC__)
#define HAS_SCALEROWDOWN34_SSSE3
extern "C" void ScaleRowDown34_SSSE3(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown34_SSSE3\n"
"_ScaleRowDown34_SSSE3:\n"
#else
".global ScaleRowDown34_SSSE3\n"
"ScaleRowDown34_SSSE3:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"movdqa _shuf0,%xmm3\n"
"movdqa _shuf1,%xmm4\n"
"movdqa _shuf2,%xmm5\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa 0x10(%esi),%xmm2\n"
"lea 0x20(%esi),%esi\n"
"movdqa %xmm2,%xmm1\n"
"palignr $0x8,%xmm0,%xmm1\n"
"pshufb %xmm3,%xmm0\n"
"pshufb %xmm4,%xmm1\n"
"pshufb %xmm5,%xmm2\n"
"movq %xmm0,(%edi)\n"
"movq %xmm1,0x8(%edi)\n"
"movq %xmm2,0x10(%edi)\n"
"lea 0x18(%edi),%edi\n"
"sub $0x18,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
extern "C" void ScaleRowDown34_1_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown34_1_Int_SSSE3\n"
"_ScaleRowDown34_1_Int_SSSE3:\n"
#else
".global ScaleRowDown34_1_Int_SSSE3\n"
"ScaleRowDown34_1_Int_SSSE3:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%ebp\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"movdqa _shuf01,%xmm2\n"
"movdqa _shuf11,%xmm3\n"
"movdqa _shuf21,%xmm4\n"
"movdqa _madd01,%xmm5\n"
"movdqa _madd11,%xmm6\n"
"movdqa _round34,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa (%esi,%ebp),%xmm1\n"
"pavgb %xmm1,%xmm0\n"
"pshufb %xmm2,%xmm0\n"
"pmaddubsw %xmm5,%xmm0\n"
"paddsw %xmm7,%xmm0\n"
"psrlw $0x2,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,(%edi)\n"
"movdqu 0x8(%esi),%xmm0\n"
"movdqu 0x8(%esi,%ebp),%xmm1\n"
"pavgb %xmm1,%xmm0\n"
"pshufb %xmm3,%xmm0\n"
"pmaddubsw %xmm6,%xmm0\n"
"paddsw %xmm7,%xmm0\n"
"psrlw $0x2,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,0x8(%edi)\n"
"movdqa 0x10(%esi),%xmm0\n"
"movdqa 0x10(%esi,%ebp),%xmm1\n"
"lea 0x20(%esi),%esi\n"
"pavgb %xmm1,%xmm0\n"
"pshufb %xmm4,%xmm0\n"
"movdqa _madd21,%xmm1\n"
"pmaddubsw %xmm1,%xmm0\n"
"paddsw %xmm7,%xmm0\n"
"psrlw $0x2,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,0x10(%edi)\n"
"lea 0x18(%edi),%edi\n"
"sub $0x18,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
extern "C" void ScaleRowDown34_0_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown34_0_Int_SSSE3\n"
"_ScaleRowDown34_0_Int_SSSE3:\n"
#else
".global ScaleRowDown34_0_Int_SSSE3\n"
"ScaleRowDown34_0_Int_SSSE3:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%ebp\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"movdqa _shuf01,%xmm2\n"
"movdqa _shuf11,%xmm3\n"
"movdqa _shuf21,%xmm4\n"
"movdqa _madd01,%xmm5\n"
"movdqa _madd11,%xmm6\n"
"movdqa _round34,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa (%esi,%ebp,1),%xmm1\n"
"pavgb %xmm0,%xmm1\n"
"pavgb %xmm1,%xmm0\n"
"pshufb %xmm2,%xmm0\n"
"pmaddubsw %xmm5,%xmm0\n"
"paddsw %xmm7,%xmm0\n"
"psrlw $0x2,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,(%edi)\n"
"movdqu 0x8(%esi),%xmm0\n"
"movdqu 0x8(%esi,%ebp,1),%xmm1\n"
"pavgb %xmm0,%xmm1\n"
"pavgb %xmm1,%xmm0\n"
"pshufb %xmm3,%xmm0\n"
"pmaddubsw %xmm6,%xmm0\n"
"paddsw %xmm7,%xmm0\n"
"psrlw $0x2,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,0x8(%edi)\n"
"movdqa 0x10(%esi),%xmm0\n"
"movdqa 0x10(%esi,%ebp,1),%xmm1\n"
"lea 0x20(%esi),%esi\n"
"pavgb %xmm0,%xmm1\n"
"pavgb %xmm1,%xmm0\n"
"pshufb %xmm4,%xmm0\n"
"movdqa _madd21,%xmm1\n"
"pmaddubsw %xmm1,%xmm0\n"
"paddsw %xmm7,%xmm0\n"
"psrlw $0x2,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movq %xmm0,0x10(%edi)\n"
"lea 0x18(%edi),%edi\n"
"sub $0x18,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
#define HAS_SCALEROWDOWN38_SSSE3
extern "C" void ScaleRowDown38_SSSE3(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown38_SSSE3\n"
"_ScaleRowDown38_SSSE3:\n"
#else
".global ScaleRowDown38_SSSE3\n"
"ScaleRowDown38_SSSE3:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%edx\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"movdqa _shuf38a ,%xmm5\n"
"movdqa _shuf38b ,%xmm6\n"
"pxor %xmm7,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa 0x10(%esi),%xmm1\n"
"lea 0x20(%esi),%esi\n"
"pshufb %xmm5,%xmm0\n"
"pshufb %xmm6,%xmm1\n"
"paddusb %xmm1,%xmm0\n"
"movq %xmm0,(%edi)\n"
"movhlps %xmm0,%xmm1\n"
"movd %xmm1,0x8(%edi)\n"
"lea 0xc(%edi),%edi\n"
"sub $0xc,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
extern "C" void ScaleRowDown38_3_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown38_3_Int_SSSE3\n"
"_ScaleRowDown38_3_Int_SSSE3:\n"
#else
".global ScaleRowDown38_3_Int_SSSE3\n"
"ScaleRowDown38_3_Int_SSSE3:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%edx\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"movdqa _shufac0,%xmm4\n"
"movdqa _shufac3,%xmm5\n"
"movdqa _scaleac3,%xmm6\n"
"pxor %xmm7,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa (%esi,%edx,1),%xmm2\n"
"movhlps %xmm0,%xmm1\n"
"movhlps %xmm2,%xmm3\n"
"punpcklbw %xmm7,%xmm0\n"
"punpcklbw %xmm7,%xmm1\n"
"punpcklbw %xmm7,%xmm2\n"
"punpcklbw %xmm7,%xmm3\n"
"paddusw %xmm2,%xmm0\n"
"paddusw %xmm3,%xmm1\n"
"movdqa (%esi,%edx,2),%xmm2\n"
"lea 0x10(%esi),%esi\n"
"movhlps %xmm2,%xmm3\n"
"punpcklbw %xmm7,%xmm2\n"
"punpcklbw %xmm7,%xmm3\n"
"paddusw %xmm2,%xmm0\n"
"paddusw %xmm3,%xmm1\n"
"movdqa %xmm0,%xmm2\n"
"psrldq $0x2,%xmm0\n"
"paddusw %xmm0,%xmm2\n"
"psrldq $0x2,%xmm0\n"
"paddusw %xmm0,%xmm2\n"
"pshufb %xmm4,%xmm2\n"
"movdqa %xmm1,%xmm3\n"
"psrldq $0x2,%xmm1\n"
"paddusw %xmm1,%xmm3\n"
"psrldq $0x2,%xmm1\n"
"paddusw %xmm1,%xmm3\n"
"pshufb %xmm5,%xmm3\n"
"paddusw %xmm3,%xmm2\n"
"pmulhuw %xmm6,%xmm2\n"
"packuswb %xmm2,%xmm2\n"
"movd %xmm2,(%edi)\n"
"pextrw $0x2,%xmm2,%eax\n"
"mov %ax,0x4(%edi)\n"
"lea 0x6(%edi),%edi\n"
"sub $0x6,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
extern "C" void ScaleRowDown38_2_Int_SSSE3(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleRowDown38_2_Int_SSSE3\n"
"_ScaleRowDown38_2_Int_SSSE3:\n"
#else
".global ScaleRowDown38_2_Int_SSSE3\n"
"ScaleRowDown38_2_Int_SSSE3:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%edx\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"movdqa _shufab0,%xmm4\n"
"movdqa _shufab1,%xmm5\n"
"movdqa _shufab2,%xmm6\n"
"movdqa _scaleab2,%xmm7\n"
"1:"
"movdqa (%esi),%xmm2\n"
"pavgb (%esi,%edx,1),%xmm2\n"
"lea 0x10(%esi),%esi\n"
"movdqa %xmm2,%xmm0\n"
"pshufb %xmm4,%xmm0\n"
"movdqa %xmm2,%xmm1\n"
"pshufb %xmm5,%xmm1\n"
"paddusw %xmm1,%xmm0\n"
"pshufb %xmm6,%xmm2\n"
"paddusw %xmm2,%xmm0\n"
"pmulhuw %xmm7,%xmm0\n"
"packuswb %xmm0,%xmm0\n"
"movd %xmm0,(%edi)\n"
"pextrw $0x2,%xmm0,%eax\n"
"mov %ax,0x4(%edi)\n"
"lea 0x6(%edi),%edi\n"
"sub $0x6,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
#endif // __PIC__
#define HAS_SCALEADDROWS_SSE2
extern "C" void ScaleAddRows_SSE2(const uint8* iptr, int32 istride,
uint16* orow, int32 iwidth, int32 iheight);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleAddRows_SSE2\n"
"_ScaleAddRows_SSE2:\n"
#else
".global ScaleAddRows_SSE2\n"
"ScaleAddRows_SSE2:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%esi\n"
"mov 0x28(%esp),%edx\n"
"mov 0x2c(%esp),%edi\n"
"mov 0x30(%esp),%ecx\n"
"mov 0x34(%esp),%ebx\n"
"pxor %xmm7,%xmm7\n"
"1:"
"movdqa (%esi),%xmm2\n"
"lea (%esi,%edx,1),%eax\n"
"movhlps %xmm2,%xmm3\n"
"lea -0x1(%ebx),%ebp\n"
"punpcklbw %xmm7,%xmm2\n"
"punpcklbw %xmm7,%xmm3\n"
"2:"
"movdqa (%eax),%xmm0\n"
"lea (%eax,%edx,1),%eax\n"
"movhlps %xmm0,%xmm1\n"
"punpcklbw %xmm7,%xmm0\n"
"punpcklbw %xmm7,%xmm1\n"
"paddusw %xmm0,%xmm2\n"
"paddusw %xmm1,%xmm3\n"
"sub $0x1,%ebp\n"
"ja 2b\n"
"movdqa %xmm2,(%edi)\n"
"movdqa %xmm3,0x10(%edi)\n"
"lea 0x20(%edi),%edi\n"
"lea 0x10(%esi),%esi\n"
"sub $0x10,%ecx\n"
"ja 1b\n"
"popa\n"
"ret\n"
);
// Bilinear row filtering combines 16x2 -> 16x1. SSE2 version
#define HAS_SCALEFILTERROWS_SSE2
extern "C" void ScaleFilterRows_SSE2(uint8* optr,
const uint8* iptr0, int32 istride,
int owidth, int source_y_fraction);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleFilterRows_SSE2\n"
"_ScaleFilterRows_SSE2:\n"
#else
".global ScaleFilterRows_SSE2\n"
"ScaleFilterRows_SSE2:\n"
#endif
"push %esi\n"
"push %edi\n"
"mov 0xc(%esp),%edi\n"
"mov 0x10(%esp),%esi\n"
"mov 0x14(%esp),%edx\n"
"mov 0x18(%esp),%ecx\n"
"mov 0x1c(%esp),%eax\n"
"cmp $0x0,%eax\n"
"je 2f\n"
"cmp $0x80,%eax\n"
"je 3f\n"
"movd %eax,%xmm6\n"
"punpcklwd %xmm6,%xmm6\n"
"pshufd $0x0,%xmm6,%xmm6\n"
"neg %eax\n"
"add $0x100,%eax\n"
"movd %eax,%xmm5\n"
"punpcklwd %xmm5,%xmm5\n"
"pshufd $0x0,%xmm5,%xmm5\n"
"pxor %xmm7,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa (%esi,%edx,1),%xmm2\n"
"lea 0x10(%esi),%esi\n"
"movdqa %xmm0,%xmm1\n"
"movdqa %xmm2,%xmm3\n"
"punpcklbw %xmm7,%xmm0\n"
"punpcklbw %xmm7,%xmm2\n"
"punpckhbw %xmm7,%xmm1\n"
"punpckhbw %xmm7,%xmm3\n"
"pmullw %xmm5,%xmm0\n"
"pmullw %xmm5,%xmm1\n"
"pmullw %xmm6,%xmm2\n"
"pmullw %xmm6,%xmm3\n"
"paddusw %xmm2,%xmm0\n"
"paddusw %xmm3,%xmm1\n"
"psrlw $0x8,%xmm0\n"
"psrlw $0x8,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"movdqa %xmm0,(%edi)\n"
"lea 0x10(%edi),%edi\n"
"sub $0x10,%ecx\n"
"ja 1b\n"
"mov -0x1(%edi),%al\n"
"mov %al,(%edi)\n"
"pop %edi\n"
"pop %esi\n"
"ret\n"
"2:"
"movdqa (%esi),%xmm0\n"
"lea 0x10(%esi),%esi\n"
"movdqa %xmm0,(%edi)\n"
"lea 0x10(%edi),%edi\n"
"sub $0x10,%ecx\n"
"ja 2b\n"
"mov -0x1(%edi),%al\n"
"mov %al,(%edi)\n"
"pop %edi\n"
"pop %esi\n"
"ret\n"
"3:"
"movdqa (%esi),%xmm0\n"
"movdqa (%esi,%edx,1),%xmm2\n"
"lea 0x10(%esi),%esi\n"
"pavgb %xmm2,%xmm0\n"
"movdqa %xmm0,(%edi)\n"
"lea 0x10(%edi),%edi\n"
"sub $0x10,%ecx\n"
"ja 3b\n"
"mov -0x1(%edi),%al\n"
"mov %al,(%edi)\n"
"pop %edi\n"
"pop %esi\n"
"ret\n"
);
// Bilinear row filtering combines 16x2 -> 16x1. SSSE3 version
#define HAS_SCALEFILTERROWS_SSSE3
extern "C" void ScaleFilterRows_SSSE3(uint8* optr,
const uint8* iptr0, int32 istride,
int owidth, int source_y_fraction);
asm(
".text\n"
#if defined(OSX)
".globl _ScaleFilterRows_SSSE3\n"
"_ScaleFilterRows_SSSE3:\n"
#else
".global ScaleFilterRows_SSSE3\n"
"ScaleFilterRows_SSSE3:\n"
#endif
"push %esi\n"
"push %edi\n"
"mov 0xc(%esp),%edi\n"
"mov 0x10(%esp),%esi\n"
"mov 0x14(%esp),%edx\n"
"mov 0x18(%esp),%ecx\n"
"mov 0x1c(%esp),%eax\n"
"cmp $0x0,%eax\n"
"je 2f\n"
"cmp $0x80,%eax\n"
"je 3f\n"
"shr %eax\n"
"mov %al,%ah\n"
"neg %al\n"
"add $0x80,%al\n"
"movd %eax,%xmm7\n"
"punpcklwd %xmm7,%xmm7\n"
"pshufd $0x0,%xmm7,%xmm7\n"
"1:"
"movdqa (%esi),%xmm0\n"
"movdqa (%esi,%edx,1),%xmm2\n"
"lea 0x10(%esi),%esi\n"
"movdqa %xmm0,%xmm1\n"
"punpcklbw %xmm2,%xmm0\n"
"punpckhbw %xmm2,%xmm1\n"
"pmaddubsw %xmm7,%xmm0\n"
"pmaddubsw %xmm7,%xmm1\n"
"psrlw $0x7,%xmm0\n"
"psrlw $0x7,%xmm1\n"
"packuswb %xmm1,%xmm0\n"
"movdqa %xmm0,(%edi)\n"
"lea 0x10(%edi),%edi\n"
"sub $0x10,%ecx\n"
"ja 1b\n"
"mov -0x1(%edi),%al\n"
"mov %al,(%edi)\n"
"pop %edi\n"
"pop %esi\n"
"ret\n"
"2:"
"movdqa (%esi),%xmm0\n"
"lea 0x10(%esi),%esi\n"
"movdqa %xmm0,(%edi)\n"
"lea 0x10(%edi),%edi\n"
"sub $0x10,%ecx\n"
"ja 2b\n"
"mov -0x1(%edi),%al\n"
"mov %al,(%edi)\n"
"pop %edi\n"
"pop %esi\n"
"ret\n"
"3:"
"movdqa (%esi),%xmm0\n"
"movdqa (%esi,%edx,1),%xmm2\n"
"lea 0x10(%esi),%esi\n"
"pavgb %xmm2,%xmm0\n"
"movdqa %xmm0,(%edi)\n"
"lea 0x10(%edi),%edi\n"
"sub $0x10,%ecx\n"
"ja 3b\n"
"mov -0x1(%edi),%al\n"
"mov %al,(%edi)\n"
"pop %edi\n"
"pop %esi\n"
"ret\n"
);
#endif
// CPU agnostic row functions
static void ScaleRowDown2_C(const uint8* iptr, int32,
uint8* dst, int32 owidth) {
for (int x = 0; x < owidth; ++x) {
*dst++ = *iptr;
iptr += 2;
}
}
static void ScaleRowDown2Int_C(const uint8* iptr, int32 istride,
uint8* dst, int32 owidth) {
for (int x = 0; x < owidth; ++x) {
*dst++ = (iptr[0] + iptr[1] +
iptr[istride] + iptr[istride + 1] + 2) >> 2;
iptr += 2;
}
}
static void ScaleRowDown4_C(const uint8* iptr, int32,
uint8* dst, int32 owidth) {
for (int x = 0; x < owidth; ++x) {
*dst++ = *iptr;
iptr += 4;
}
}
static void ScaleRowDown4Int_C(const uint8* iptr, int32 istride,
uint8* dst, int32 owidth) {
for (int x = 0; x < owidth; ++x) {
*dst++ = (iptr[0] + iptr[1] + iptr[2] + iptr[3] +
iptr[istride + 0] + iptr[istride + 1] +
iptr[istride + 2] + iptr[istride + 3] +
iptr[istride * 2 + 0] + iptr[istride * 2 + 1] +
iptr[istride * 2 + 2] + iptr[istride * 2 + 3] +
iptr[istride * 3 + 0] + iptr[istride * 3 + 1] +
iptr[istride * 3 + 2] + iptr[istride * 3 + 3] + 8) >> 4;
iptr += 4;
}
}
// 640 output pixels is enough to allow 5120 input pixels with 1/8 scale down.
// Keeping the total buffer under 4096 bytes avoids a stackcheck, saving 4% cpu.
static const int kMaxOutputWidth = 640;
static const int kMaxRow12 = kMaxOutputWidth * 2;
static void ScaleRowDown8_C(const uint8* iptr, int32,
uint8* dst, int32 owidth) {
for (int x = 0; x < owidth; ++x) {
*dst++ = *iptr;
iptr += 8;
}
}
// Note calling code checks width is less than max and if not
// uses ScaleRowDown8_C instead.
static void ScaleRowDown8Int_C(const uint8* iptr, int32 istride,
uint8* dst, int32 owidth) {
ALIGN16(uint8 irow[kMaxRow12 * 2]);
ASSERT(owidth <= kMaxOutputWidth);
ScaleRowDown4Int_C(iptr, istride, irow, owidth * 2);
ScaleRowDown4Int_C(iptr + istride * 4, istride, irow + kMaxOutputWidth,
owidth * 2);
ScaleRowDown2Int_C(irow, kMaxOutputWidth, dst, owidth);
}
static void ScaleRowDown34_C(const uint8* iptr, int32,
uint8* dst, int32 owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
uint8* dend = dst + owidth;
do {
dst[0] = iptr[0];
dst[1] = iptr[1];
dst[2] = iptr[3];
dst += 3;
iptr += 4;
} while (dst < dend);
}
// Filter rows 0 and 1 together, 3 : 1
static void ScaleRowDown34_0_Int_C(const uint8* iptr, int32 istride,
uint8* d, int32 owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
uint8* dend = d + owidth;
const uint8* s = iptr;
const uint8* t = iptr + istride;
do {
uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 * 3 + b0 + 2) >> 2;
d[1] = (a1 * 3 + b1 + 2) >> 2;
d[2] = (a2 * 3 + b2 + 2) >> 2;
d += 3;
s += 4;
t += 4;
} while (d < dend);
}
// Filter rows 1 and 2 together, 1 : 1
static void ScaleRowDown34_1_Int_C(const uint8* iptr, int32 istride,
uint8* d, int32 owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
uint8* dend = d + owidth;
const uint8* s = iptr;
const uint8* t = iptr + istride;
do {
uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
d[0] = (a0 + b0 + 1) >> 1;
d[1] = (a1 + b1 + 1) >> 1;
d[2] = (a2 + b2 + 1) >> 1;
d += 3;
s += 4;
t += 4;
} while (d < dend);
}
#if defined(HAS_SCALEFILTERROWS_SSE2)
// Filter row to 3/4
static void ScaleFilterCols34_C(uint8* optr, const uint8* iptr, int owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
uint8* dend = optr + owidth;
const uint8* s = iptr;
do {
optr[0] = (s[0] * 3 + s[1] * 1 + 2) >> 2;
optr[1] = (s[1] * 1 + s[2] * 1 + 1) >> 1;
optr[2] = (s[2] * 1 + s[3] * 3 + 2) >> 2;
optr += 3;
s += 4;
} while (optr < dend);
}
#endif
static void ScaleFilterCols_C(uint8* optr, const uint8* iptr,
int owidth, int dx) {
int x = 0;
for (int j = 0; j < owidth; ++j) {
int xi = x >> 16;
int xf1 = x & 0xffff;
int xf0 = 65536 - xf1;
*optr++ = (iptr[xi] * xf0 + iptr[xi + 1] * xf1) >> 16;
x += dx;
}
}
#ifdef TEST_RSTSC
uint64 timers34[4] = { 0, };
#endif
static const int kMaxInputWidth = 2560;
#if defined(HAS_SCALEFILTERROWS_SSE2)
#define HAS_SCALEROWDOWN34_SSE2
// Filter rows 0 and 1 together, 3 : 1
static void ScaleRowDown34_0_Int_SSE2(const uint8* iptr, int32 istride,
uint8* d, int32 owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
ALIGN16(uint8 row[kMaxInputWidth]);
#ifdef TEST_RSTSC
uint64 t1 = __rdtsc();
#endif
ScaleFilterRows_SSE2(row, iptr, istride, owidth * 4 / 3, 256 / 4);
#ifdef TEST_RSTSC
uint64 t2 = __rdtsc();
#endif
ScaleFilterCols34_C(d, row, owidth);
#ifdef TEST_RSTSC
uint64 t3 = __rdtsc();
timers34[0] += t2 - t1;
timers34[1] += t3 - t2;
#endif
}
// Filter rows 1 and 2 together, 1 : 1
static void ScaleRowDown34_1_Int_SSE2(const uint8* iptr, int32 istride,
uint8* d, int32 owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
ALIGN16(uint8 row[kMaxInputWidth]);
#ifdef TEST_RSTSC
uint64 t1 = __rdtsc();
#endif
ScaleFilterRows_SSE2(row, iptr, istride, owidth * 4 / 3, 256 / 2);
#ifdef TEST_RSTSC
uint64 t2 = __rdtsc();
#endif
ScaleFilterCols34_C(d, row, owidth);
#ifdef TEST_RSTSC
uint64 t3 = __rdtsc();
timers34[2] += t2 - t1;
timers34[3] += t3 - t2;
#endif
}
#endif
static void ScaleRowDown38_C(const uint8* iptr, int32,
uint8* dst, int32 owidth) {
ASSERT(owidth % 3 == 0);
for (int x = 0; x < owidth; x += 3) {
dst[0] = iptr[0];
dst[1] = iptr[3];
dst[2] = iptr[6];
dst += 3;
iptr += 8;
}
}
// 8x3 -> 3x1
static void ScaleRowDown38_3_Int_C(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
for (int i = 0; i < owidth; i+=3) {
optr[0] = (iptr[0] + iptr[1] + iptr[2] +
iptr[istride + 0] + iptr[istride + 1] + iptr[istride + 2] +
iptr[istride * 2 + 0] + iptr[istride * 2 + 1] + iptr[istride * 2 + 2]) *
(65536 / 9) >> 16;
optr[1] = (iptr[3] + iptr[4] + iptr[5] +
iptr[istride + 3] + iptr[istride + 4] + iptr[istride + 5] +
iptr[istride * 2 + 3] + iptr[istride * 2 + 4] + iptr[istride * 2 + 5]) *
(65536 / 9) >> 16;
optr[2] = (iptr[6] + iptr[7] +
iptr[istride + 6] + iptr[istride + 7] +
iptr[istride * 2 + 6] + iptr[istride * 2 + 7]) *
(65536 / 6) >> 16;
iptr += 8;
optr += 3;
}
}
// 8x2 -> 3x1
static void ScaleRowDown38_2_Int_C(const uint8* iptr, int32 istride,
uint8* optr, int32 owidth) {
ASSERT((owidth % 3 == 0) && (owidth > 0));
for (int i = 0; i < owidth; i+=3) {
optr[0] = (iptr[0] + iptr[1] + iptr[2] +
iptr[istride + 0] + iptr[istride + 1] + iptr[istride + 2]) *
(65536 / 6) >> 16;
optr[1] = (iptr[3] + iptr[4] + iptr[5] +
iptr[istride + 3] + iptr[istride + 4] + iptr[istride + 5]) *
(65536 / 6) >> 16;
optr[2] = (iptr[6] + iptr[7] +
iptr[istride + 6] + iptr[istride + 7]) *
(65536 / 4) >> 16;
iptr += 8;
optr += 3;
}
}
// C version 8x2 -> 8x1
static void ScaleFilterRows_C(uint8* optr,
const uint8* iptr0, int32 istride,
int owidth, int source_y_fraction) {
ASSERT(owidth > 0);
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
const uint8* iptr1 = iptr0 + istride;
uint8* end = optr + owidth;
do {
optr[0] = (iptr0[0] * y0_fraction + iptr1[0] * y1_fraction) >> 8;
optr[1] = (iptr0[1] * y0_fraction + iptr1[1] * y1_fraction) >> 8;
optr[2] = (iptr0[2] * y0_fraction + iptr1[2] * y1_fraction) >> 8;
optr[3] = (iptr0[3] * y0_fraction + iptr1[3] * y1_fraction) >> 8;
optr[4] = (iptr0[4] * y0_fraction + iptr1[4] * y1_fraction) >> 8;
optr[5] = (iptr0[5] * y0_fraction + iptr1[5] * y1_fraction) >> 8;
optr[6] = (iptr0[6] * y0_fraction + iptr1[6] * y1_fraction) >> 8;
optr[7] = (iptr0[7] * y0_fraction + iptr1[7] * y1_fraction) >> 8;
iptr0 += 8;
iptr1 += 8;
optr += 8;
} while (optr < end);
optr[0] = optr[-1];
}
void ScaleAddRows_C(const uint8* iptr, int32 istride,
uint16* orow, int32 iwidth, int32 iheight) {
ASSERT(iwidth > 0);
ASSERT(iheight > 0);
for (int x = 0; x < iwidth; ++x) {
const uint8* s = iptr + x;
int sum = 0;
for (int y = 0; y < iheight; ++y) {
sum += s[0];
s += istride;
}
orow[x] = sum;
}
}
/**
* Scale plane, 1/2
*
* This is an optimized version for scaling down a plane to 1/2 of
* its original size.
*
*/
static void ScalePlaneDown2(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr,
bool interpolate) {
ASSERT(iwidth % 2 == 0);
ASSERT(iheight % 2 == 0);
void (*ScaleRowDown2)(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
#if defined(HAS_SCALEROWDOWN2_NEON)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasNEON) &&
(owidth % 16 == 0) && (istride % 16 == 0) && (ostride % 16 == 0) &&
IS_ALIGNED(iptr, 16) && IS_ALIGNED(optr, 16)) {
ScaleRowDown2 = interpolate ? ScaleRowDown2Int_NEON : ScaleRowDown2_NEON;
} else
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) &&
(owidth % 16 == 0) && IS_ALIGNED(iptr, 16) && IS_ALIGNED(optr, 16)) {
ScaleRowDown2 = interpolate ? ScaleRowDown2Int_SSE2 : ScaleRowDown2_SSE2;
} else
#endif
{
ScaleRowDown2 = interpolate ? ScaleRowDown2Int_C : ScaleRowDown2_C;
}
for (int y = 0; y < oheight; ++y) {
ScaleRowDown2(iptr, istride, optr, owidth);
iptr += (istride << 1);
optr += ostride;
}
}
/**
* Scale plane, 1/4
*
* This is an optimized version for scaling down a plane to 1/4 of
* its original size.
*/
static void ScalePlaneDown4(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr,
bool interpolate) {
ASSERT(iwidth % 4 == 0);
ASSERT(iheight % 4 == 0);
void (*ScaleRowDown4)(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
#if defined(HAS_SCALEROWDOWN4_SSE2)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) &&
(owidth % 8 == 0) && (istride % 16 == 0) && (ostride % 8 == 0) &&
IS_ALIGNED(iptr, 16) && IS_ALIGNED(optr, 8)) {
ScaleRowDown4 = interpolate ? ScaleRowDown4Int_SSE2 : ScaleRowDown4_SSE2;
} else
#endif
{
ScaleRowDown4 = interpolate ? ScaleRowDown4Int_C : ScaleRowDown4_C;
}
for (int y = 0; y < oheight; ++y) {
ScaleRowDown4(iptr, istride, optr, owidth);
iptr += (istride << 2);
optr += ostride;
}
}
/**
* Scale plane, 1/8
*
* This is an optimized version for scaling down a plane to 1/8
* of its original size.
*
*/
static void ScalePlaneDown8(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr,
bool interpolate) {
ASSERT(iwidth % 8 == 0);
ASSERT(iheight % 8 == 0);
void (*ScaleRowDown8)(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
#if defined(HAS_SCALEROWDOWN8_SSE2)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) &&
(owidth % 16 == 0) && owidth <= kMaxOutputWidth &&
(istride % 16 == 0) && (ostride % 16 == 0) &&
IS_ALIGNED(iptr, 16) && IS_ALIGNED(optr, 16)) {
ScaleRowDown8 = interpolate ? ScaleRowDown8Int_SSE2 : ScaleRowDown8_SSE2;
} else
#endif
{
ScaleRowDown8 = interpolate && (owidth <= kMaxOutputWidth) ?
ScaleRowDown8Int_C : ScaleRowDown8_C;
}
for (int y = 0; y < oheight; ++y) {
ScaleRowDown8(iptr, istride, optr, owidth);
iptr += (istride << 3);
optr += ostride;
}
}
/**
* Scale plane down, 3/4
*
* Provided by Frank Barchard (fbarchard@google.com)
*
*/
static void ScalePlaneDown34(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8* iptr, uint8* optr,
bool interpolate) {
ASSERT(owidth % 3 == 0);
void (*ScaleRowDown34_0)(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
void (*ScaleRowDown34_1)(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
#if defined(HAS_SCALEROWDOWN34_SSSE3)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSSE3) &&
(owidth % 24 == 0) && (istride % 16 == 0) && (ostride % 8 == 0) &&
IS_ALIGNED(iptr, 16) && IS_ALIGNED(optr, 8)) {
if (!interpolate) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSSE3;
}
} else
#endif
#if defined(HAS_SCALEROWDOWN34_SSE2)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) &&
(owidth % 24 == 0) && (istride % 16 == 0) && (ostride % 8 == 0) &&
IS_ALIGNED(iptr, 16) && IS_ALIGNED(optr, 8) &&
interpolate) {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_SSE2;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_SSE2;
} else
#endif
{
if (!interpolate) {
ScaleRowDown34_0 = ScaleRowDown34_C;
ScaleRowDown34_1 = ScaleRowDown34_C;
} else {
ScaleRowDown34_0 = ScaleRowDown34_0_Int_C;
ScaleRowDown34_1 = ScaleRowDown34_1_Int_C;
}
}
int irow = 0;
for (int y = 0; y < oheight; ++y) {
switch (irow) {
case 0:
ScaleRowDown34_0(iptr, istride, optr, owidth);
break;
case 1:
ScaleRowDown34_1(iptr, istride, optr, owidth);
break;
case 2:
ScaleRowDown34_0(iptr + istride, -istride, optr, owidth);
break;
}
++irow;
iptr += istride;
optr += ostride;
if (irow >= 3) {
iptr += istride;
irow = 0;
}
}
#ifdef TEST_RSTSC
std::cout << "Timer34_0 Row " << std::setw(9) << timers34[0]
<< " Column " << std::setw(9) << timers34[1]
<< " Timer34_1 Row " << std::setw(9) << timers34[2]
<< " Column " << std::setw(9) << timers34[3] << std::endl;
#endif
}
/**
* Scale plane, 3/8
*
* This is an optimized version for scaling down a plane to 3/8
* of its original size.
*
* Reduces 16x3 to 6x1
*/
static void ScalePlaneDown38(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8* iptr, uint8* optr,
bool interpolate) {
ASSERT(owidth % 3 == 0);
void (*ScaleRowDown38_3)(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
void (*ScaleRowDown38_2)(const uint8* iptr, int32 istride,
uint8* orow, int32 owidth);
#if defined(HAS_SCALEROWDOWN38_SSSE3)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSSE3) &&
(owidth % 24 == 0) && (istride % 16 == 0) && (ostride % 8 == 0) &&
IS_ALIGNED(iptr, 16) && IS_ALIGNED(optr, 8)) {
if (!interpolate) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Int_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_SSSE3;
}
} else
#endif
{
if (!interpolate) {
ScaleRowDown38_3 = ScaleRowDown38_C;
ScaleRowDown38_2 = ScaleRowDown38_C;
} else {
ScaleRowDown38_3 = ScaleRowDown38_3_Int_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Int_C;
}
}
int irow = 0;
for (int y = 0; y < oheight; ++y) {
switch (irow) {
case 0:
case 1:
ScaleRowDown38_3(iptr, istride, optr, owidth);
iptr += istride * 3;
++irow;
break;
case 2:
ScaleRowDown38_2(iptr, istride, optr, owidth);
iptr += istride * 2;
irow = 0;
break;
}
optr += ostride;
}
}
inline static uint32 SumBox(int32 iboxwidth, int32 iboxheight,
int32 istride, const uint8 *iptr) {
ASSERT(iboxwidth > 0);
ASSERT(iboxheight > 0);
uint32 sum = 0u;
for (int y = 0; y < iboxheight; ++y) {
for (int x = 0; x < iboxwidth; ++x) {
sum += iptr[x];
}
iptr += istride;
}
return sum;
}
static void ScalePlaneBoxRow(int32 owidth, int32 boxheight,
int dx, int32 istride,
const uint8 *iptr, uint8 *optr) {
int x = 0;
for (int i = 0; i < owidth; ++i) {
int ix = x >> 16;
x += dx;
int boxwidth = (x >> 16) - ix;
*optr++ = SumBox(boxwidth, boxheight, istride, iptr + ix) /
(boxwidth * boxheight);
}
}
inline static uint32 SumPixels(int32 iboxwidth, const uint16 *iptr) {
ASSERT(iboxwidth > 0);
uint32 sum = 0u;
for (int x = 0; x < iboxwidth; ++x) {
sum += iptr[x];
}
return sum;
}
static void ScaleAddCols2_C(int32 owidth, int32 boxheight, int dx,
const uint16 *iptr, uint8 *optr) {
int scaletbl[2];
int minboxwidth = (dx >> 16);
scaletbl[0] = 65536 / (minboxwidth * boxheight);
scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
int *scaleptr = scaletbl - minboxwidth;
int x = 0;
for (int i = 0; i < owidth; ++i) {
int ix = x >> 16;
x += dx;
int boxwidth = (x >> 16) - ix;
*optr++ = SumPixels(boxwidth, iptr + ix) * scaleptr[boxwidth] >> 16;
}
}
static void ScaleAddCols1_C(int32 owidth, int32 boxheight, int dx,
const uint16 *iptr, uint8 *optr) {
int boxwidth = (dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int x = 0;
for (int i = 0; i < owidth; ++i) {
*optr++ = SumPixels(boxwidth, iptr + x) * scaleval >> 16;
x += boxwidth;
}
}
/**
* Scale plane down to any dimensions, with interpolation.
* (boxfilter).
*
* Same method as SimpleScale, which is fixed point, outputting
* one pixel of destination using fixed point (16.16) to step
* through source, sampling a box of pixel with simple
* averaging.
*/
static void ScalePlaneBox(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr) {
ASSERT(owidth > 0);
ASSERT(oheight > 0);
int dy = (iheight << 16) / oheight;
int dx = (iwidth << 16) / owidth;
if ((iwidth % 16 != 0) || (iwidth > kMaxInputWidth) ||
oheight * 2 > iheight) {
uint8 *dst = optr;
int dy = (iheight << 16) / oheight;
int dx = (iwidth << 16) / owidth;
int y = 0;
for (int j = 0; j < oheight; ++j) {
int iy = y >> 16;
const uint8 *const src = iptr + iy * istride;
y += dy;
if (y > (iheight << 16)) {
y = (iheight << 16);
}
int boxheight = (y >> 16) - iy;
ScalePlaneBoxRow(owidth, boxheight,
dx, istride,
src, dst);
dst += ostride;
}
} else {
ALIGN16(uint16 row[kMaxInputWidth]);
void (*ScaleAddRows)(const uint8* iptr, int32 istride,
uint16* orow, int32 iwidth, int32 iheight);
void (*ScaleAddCols)(int32 owidth, int32 boxheight, int dx,
const uint16 *iptr, uint8 *optr);
#if defined(HAS_SCALEADDROWS_SSE2)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) &&
(istride % 16 == 0) && IS_ALIGNED(iptr, 16) && (iwidth % 16) == 0) {
ScaleAddRows = ScaleAddRows_SSE2;
} else
#endif
{
ScaleAddRows = ScaleAddRows_C;
}
if (dx & 0xffff) {
ScaleAddCols = ScaleAddCols2_C;
} else {
ScaleAddCols = ScaleAddCols1_C;
}
int y = 0;
for (int j = 0; j < oheight; ++j) {
int iy = y >> 16;
const uint8 *const src = iptr + iy * istride;
y += dy;
if (y > (iheight << 16)) {
y = (iheight << 16);
}
int boxheight = (y >> 16) - iy;
ScaleAddRows(src, istride, row, iwidth, boxheight);
ScaleAddCols(owidth, boxheight, dx, row, optr);
optr += ostride;
}
}
}
/**
* Scale plane to/from any dimensions, with interpolation.
*/
static void ScalePlaneBilinearSimple(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr) {
uint8 *dst = optr;
int dx = (iwidth << 16) / owidth;
int dy = (iheight << 16) / oheight;
int maxx = ((iwidth - 1) << 16) - 1;
int maxy = ((iheight - 1) << 16) - 1;
int y = (oheight < iheight) ? 32768 : (iheight << 16) / oheight - 32768;
for (int i = 0; i < oheight; ++i) {
int cy = (y < 0) ? 0 : y;
int yi = cy >> 16;
int yf = cy & 0xffff;
const uint8 *const src = iptr + yi * istride;
int x = (owidth < iwidth) ? 32768 : (iwidth << 16) / owidth - 32768;
for (int j = 0; j < owidth; ++j) {
int cx = (x < 0) ? 0 : x;
int xi = cx >> 16;
int xf = cx & 0xffff;
int r0 = (src[xi] * (65536 - xf) + src[xi + 1] * xf) >> 16;
int r1 = (src[xi + istride] * (65536 - xf) + src[xi + istride + 1] * xf)
>> 16;
*dst++ = (r0 * (65536 - yf) + r1 * yf) >> 16;
x += dx;
if (x > maxx)
x = maxx;
}
dst += ostride - owidth;
y += dy;
if (y > maxy)
y = maxy;
}
}
/**
* Scale plane to/from any dimensions, with bilinear
* interpolation.
*/
static void ScalePlaneBilinear(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr) {
ASSERT(owidth > 0);
ASSERT(oheight > 0);
int dy = (iheight << 16) / oheight;
int dx = (iwidth << 16) / owidth;
if ((iwidth % 8 != 0) || (iwidth > kMaxInputWidth)) {
ScalePlaneBilinearSimple(iwidth, iheight, owidth, oheight, istride, ostride,
iptr, optr);
} else {
ALIGN16(uint8 row[kMaxInputWidth + 1]);
void (*ScaleFilterRows)(uint8* optr, const uint8* iptr0, int32 istride,
int owidth, int source_y_fraction);
void (*ScaleFilterCols)(uint8* optr, const uint8* iptr,
int owidth, int dx);
#if defined(HAS_SCALEFILTERROWS_SSSE3)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSSE3) &&
(istride % 16 == 0) && IS_ALIGNED(iptr, 16) && (iwidth % 16) == 0) {
ScaleFilterRows = ScaleFilterRows_SSSE3;
} else
#endif
#if defined(HAS_SCALEFILTERROWS_SSE2)
if (libyuv::CpuInfo::TestCpuFlag(libyuv::CpuInfo::kCpuHasSSE2) &&
(istride % 16 == 0) && IS_ALIGNED(iptr, 16) && (iwidth % 16) == 0) {
ScaleFilterRows = ScaleFilterRows_SSE2;
} else
#endif
{
ScaleFilterRows = ScaleFilterRows_C;
}
ScaleFilterCols = ScaleFilterCols_C;
int y = 0;
int maxy = ((iheight - 1) << 16) - 1; // max is filter of last 2 rows.
for (int j = 0; j < oheight; ++j) {
int iy = y >> 16;
int fy = (y >> 8) & 255;
const uint8 *const src = iptr + iy * istride;
ScaleFilterRows(row, src, istride, iwidth, fy);
ScaleFilterCols(optr, row, owidth, dx);
optr += ostride;
y += dy;
if (y > maxy) {
y = maxy;
}
}
}
}
/**
* Scale plane to/from any dimensions, without interpolation.
* Fixed point math is used for performance: The upper 16 bits
* of x and dx is the integer part of the source position and
* the lower 16 bits are the fixed decimal part.
*/
static void ScalePlaneSimple(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr) {
uint8 *dst = optr;
int dx = (iwidth << 16) / owidth;
for (int y = 0; y < oheight; ++y) {
const uint8 *const src = iptr + (y * iheight / oheight) * istride;
// TODO(fbarchard): Round X coordinate by setting x=0x8000.
int x = 0;
for (int i = 0; i < owidth; ++i) {
*dst++ = src[x >> 16];
x += dx;
}
dst += ostride - owidth;
}
}
/**
* Scale plane to/from any dimensions.
*/
static void ScalePlaneAnySize(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr,
bool interpolate) {
if (!interpolate) {
ScalePlaneSimple(iwidth, iheight, owidth, oheight, istride, ostride,
iptr, optr);
} else {
// fall back to non-optimized version
ScalePlaneBilinear(iwidth, iheight, owidth, oheight, istride, ostride,
iptr, optr);
}
}
/**
* Scale plane down, any size
*
* This is an optimized version for scaling down a plane to any size.
* The current implementation is ~10 times faster compared to the
* reference implementation for e.g. XGA->LowResPAL
*
*/
static void ScalePlaneDown(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr,
bool interpolate) {
if (!interpolate) {
ScalePlaneSimple(iwidth, iheight, owidth, oheight, istride, ostride,
iptr, optr);
} else if (iheight * 2 > oheight) { // between 1/2x and 1x use bilinear
ScalePlaneBilinear(iwidth, iheight, owidth, oheight, istride, ostride,
iptr, optr);
} else {
ScalePlaneBox(iwidth, iheight, owidth, oheight, istride, ostride,
iptr, optr);
}
}
/**
* Copy plane, no scaling
*
* This simply copies the given plane without scaling.
* The current implementation is ~115 times faster
* compared to the reference implementation.
*
*/
static void CopyPlane(int32 iwidth, int32 iheight,
int32 owidth, int32 oheight,
int32 istride, int32 ostride,
const uint8 *iptr, uint8 *optr) {
if (istride == iwidth && ostride == owidth) {
// All contiguous, so can use REALLY fast path.
memcpy(optr, iptr, iwidth * iheight);
} else {
// Not all contiguous; must copy scanlines individually
const uint8 *src = iptr;
uint8 *dst = optr;
for (int i = 0; i < iheight; ++i) {
memcpy(dst, src, iwidth);
dst += ostride;
src += istride;
}
}
}
static void ScalePlane(const uint8 *in, int32 istride,
int32 iwidth, int32 iheight,
uint8 *out, int32 ostride,
int32 owidth, int32 oheight,
bool interpolate, bool use_ref) {
// Use specialized scales to improve performance for common resolutions.
// For example, all the 1/2 scalings will use ScalePlaneDown2()
if (owidth == iwidth && oheight == iheight) {
// Straight copy.
CopyPlane(iwidth, iheight, owidth, oheight, istride, ostride, in, out);
} else if (owidth <= iwidth && oheight <= iheight) {
// Scale down.
if (use_ref) {
// For testing, allow the optimized versions to be disabled.
ScalePlaneDown(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
} else if (4 * owidth == 3 * iwidth && 4 * oheight == 3 * iheight) {
// optimized, 3/4
ScalePlaneDown34(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
} else if (2 * owidth == iwidth && 2 * oheight == iheight) {
// optimized, 1/2
ScalePlaneDown2(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
// 3/8 rounded up for odd sized chroma height.
} else if (8 * owidth == 3 * iwidth && oheight == ((iheight * 3 + 7) / 8)) {
// optimized, 3/8
ScalePlaneDown38(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
} else if (4 * owidth == iwidth && 4 * oheight == iheight) {
// optimized, 1/4
ScalePlaneDown4(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
} else if (8 * owidth == iwidth && 8 * oheight == iheight) {
// optimized, 1/8
ScalePlaneDown8(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
} else {
// Arbitrary downsample
ScalePlaneDown(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
}
} else {
// Arbitrary scale up and/or down.
ScalePlaneAnySize(iwidth, iheight, owidth, oheight, istride, ostride,
in, out, interpolate);
}
}
/**
* Scale a plane.
*
* This function in turn calls a scaling function
* suitable for handling the desired resolutions.
*
*/
bool YuvScaler::Scale(const uint8 *inY, const uint8 *inU, const uint8 *inV,
int32 istrideY, int32 istrideU, int32 istrideV,
int32 iwidth, int32 iheight,
uint8 *outY, uint8 *outU, uint8 *outV,
int32 ostrideY, int32 ostrideU, int32 ostrideV,
int32 owidth, int32 oheight,
bool interpolate) {
if (!inY || !inU || !inV || iwidth <= 0 || iheight <= 0 ||
!outY || !outU || !outV || owidth <= 0 || oheight <= 0) {
return false;
}
int32 halfiwidth = (iwidth + 1) >> 1;
int32 halfiheight = (iheight + 1) >> 1;
int32 halfowidth = (owidth + 1) >> 1;
int32 halfoheight = (oheight + 1) >> 1;
ScalePlane(inY, istrideY, iwidth, iheight,
outY, ostrideY, owidth, oheight,
interpolate, use_reference_impl_);
ScalePlane(inU, istrideU, halfiwidth, halfiheight,
outU, ostrideU, halfowidth, halfoheight,
interpolate, use_reference_impl_);
ScalePlane(inV, istrideV, halfiwidth, halfiheight,
outV, ostrideV, halfowidth, halfoheight,
interpolate, use_reference_impl_);
return true;
}
bool YuvScaler::Scale(const uint8 *in, int32 iwidth, int32 iheight,
uint8 *out, int32 owidth, int32 oheight, int32 ooffset,
bool interpolate) {
if (!in || iwidth <= 0 || iheight <= 0 ||
!out || owidth <= 0 || oheight <= 0 || ooffset < 0 ||
ooffset >= oheight) {
return false;
}
ooffset = ooffset & ~1; // chroma requires offset to multiple of 2.
int32 halfiwidth = (iwidth + 1) >> 1;
int32 halfiheight = (iheight + 1) >> 1;
int32 halfowidth = (owidth + 1) >> 1;
int32 halfoheight = (oheight + 1) >> 1;
int32 aheight = oheight - ooffset * 2; // actual output height
const uint8 *const iyptr = in;
uint8 *oyptr = out + ooffset * owidth;
const uint8 *const iuptr = in + iwidth * iheight;
uint8 *ouptr = out + owidth * oheight + (ooffset >> 1) * halfowidth;
const uint8 *const ivptr = in + iwidth * iheight +
halfiwidth * halfiheight;
uint8 *ovptr = out + owidth * oheight + halfowidth * halfoheight +
(ooffset >> 1) * halfowidth;
return Scale(iyptr, iuptr, ivptr, iwidth, halfiwidth, halfiwidth,
iwidth, iheight, oyptr, ouptr, ovptr, owidth,
halfowidth, halfowidth, owidth, aheight, interpolate);
}
} // namespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "video_common.h"
#include <sstream>
#include "common.h"
namespace libyuv {
struct FourCCAliasEntry {
uint32 alias;
uint32 canonical;
};
static const FourCCAliasEntry kFourCCAliases[] = {
{FOURCC_IYUV, FOURCC_I420},
{FOURCC_YU12, FOURCC_I420},
{FOURCC_YUYV, FOURCC_YUY2},
{FOURCC_YUVS, FOURCC_YUY2},
{FOURCC_HDYC, FOURCC_UYVY},
{FOURCC_2VUY, FOURCC_UYVY},
{FOURCC_BA81, FOURCC_BGGR},
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
{FOURCC_RGB3, FOURCC_RAW},
{FOURCC_BGR3, FOURCC_24BG},
};
uint32 CanonicalFourCC(uint32 fourcc) {
for (int i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
if (kFourCCAliases[i].alias == fourcc) {
return kFourCCAliases[i].canonical;
}
}
// Not an alias, so return it as-is.
return fourcc;
}
std::string VideoFormat::ToString() const {
std::string fourcc_name = GetFourccName(fourcc) + " ";
for (std::string::const_iterator i = fourcc_name.begin();
i < fourcc_name.end(); ++i) {
// Test character is printable; Avoid isprint() which asserts on negatives
if (*i < 32 || *i >= 127) {
fourcc_name = "";
break;
}
}
std::ostringstream ss;
ss << fourcc_name << width << "x" << height << "x" << IntervalToFps(interval);
return ss.str();
}
} // namespace libyuv
/*
* Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* Common definitions for video, including fourcc and VideoFormat
*/
#ifndef LIBYUV_SOURCE_VIDEO_COMMON_H_
#define LIBYUV_SOURCE_VIDEO_COMMON_H_
#include <string>
#include "basic_types.h"
namespace libyuv {
//////////////////////////////////////////////////////////////////////////////
// Definition of fourcc.
//////////////////////////////////////////////////////////////////////////////
// Convert four characters to a fourcc code.
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch.
#define FOURCC(a, b, c, d) (\
(static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
// Get the name, that is, string with four characters, of a fourcc code.
inline std::string GetFourccName(uint32 fourcc) {
std::string name;
name.push_back(static_cast<char>(fourcc & 0xFF));
name.push_back(static_cast<char>((fourcc >> 8) & 0xFF));
name.push_back(static_cast<char>((fourcc >> 16) & 0xFF));
name.push_back(static_cast<char>((fourcc >> 24) & 0xFF));
return name;
}
// Some good pages discussing FourCC codes:
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
// http://www.fourcc.org/yuv.php
enum FourCC {
// Canonical fourcc codes used in our code.
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
// Next four are Bayer RGB formats. The four characters define the order of
// the colours in each 2x2 pixel grid, going left-to-right and top-to-bottom.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
// Aliases for canonical fourcc codes, replaced with their canonical
// equivalents by CanonicalFourCC().
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Alias for I420
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG
// Match any fourcc.
FOURCC_ANY = 0xFFFFFFFF,
};
// Converts fourcc aliases into canonical ones.
uint32 CanonicalFourCC(uint32 fourcc);
//////////////////////////////////////////////////////////////////////////////
// Definition of VideoFormat.
//////////////////////////////////////////////////////////////////////////////
static const int64 kNumNanosecsPerSec = 1000000000;
struct VideoFormat {
static const int64 kMinimumInterval = kNumNanosecsPerSec / 10000; // 10k fps
VideoFormat() : width(0), height(0), interval(0), fourcc(0) {}
VideoFormat(int w, int h, int64 interval_ns, uint32 cc)
: width(w),
height(h),
interval(interval_ns),
fourcc(cc) {
}
VideoFormat(const VideoFormat& format)
: width(format.width),
height(format.height),
interval(format.interval),
fourcc(format.fourcc) {
}
static int64 FpsToInterval(int fps) {
return fps ? kNumNanosecsPerSec / fps : kMinimumInterval;
}
static int IntervalToFps(int64 interval) {
// Normalize the interval first.
interval = libyuv::_max(interval, kMinimumInterval);
return static_cast<int>(kNumNanosecsPerSec / interval);
}
bool operator==(const VideoFormat& format) const {
return width == format.width && height == format.height &&
interval == format.interval && fourcc == format.fourcc;
}
bool operator!=(const VideoFormat& format) const {
return !(*this == format);
}
bool operator<(const VideoFormat& format) const {
return (fourcc < format.fourcc) ||
(fourcc == format.fourcc && width < format.width) ||
(fourcc == format.fourcc && width == format.width &&
height < format.height) ||
(fourcc == format.fourcc && width == format.width &&
height == format.height && interval > format.interval);
}
int framerate() const { return IntervalToFps(interval); }
// Check if both width and height are 0.
bool IsSize0x0() const { return 0 == width && 0 == height; }
// Check if this format is less than another one by comparing the resolution
// and frame rate.
bool IsPixelRateLess(const VideoFormat& format) const {
return width * height * framerate() <
format.width * format.height * format.framerate();
}
// Get a string presentation in the form of "fourcc width x height x fps"
std::string ToString() const;
int width; // in number of pixels
int height; // in number of pixels
int64 interval; // in nanoseconds
uint32 fourcc; // color space. FOURCC_ANY means that any color space is OK.
};
// Result of video capturer start.
enum CaptureResult {
CR_SUCCESS, // The capturer starts successfully.
CR_PENDING, // The capturer is pending to start the capture device.
CR_FAILURE, // The capturer fails to start.
CR_NO_DEVICE, // The capturer has no device and fails to start.
};
} // namespace libyuv
#endif // LIBYUV_SOURCE_VIDEO_COMMON_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment