Commit 585a1261 authored by fbarchard@google.com's avatar fbarchard@google.com

rewrite ARGBToI420 with SSSE3

TEST=talk unittests
BUG=none
Review URL: http://webrtc-codereview.appspot.com/251003

git-svn-id: http://libyuv.googlecode.com/svn/trunk@46 16f28f9a-4ce2-e073-06de-1de4eb20be90
parent 8cfa3073
...@@ -16,93 +16,81 @@ ...@@ -16,93 +16,81 @@
namespace libyuv { namespace libyuv {
int int I420ToRGB24(const uint8* src_y, int src_stride_y,
I420ToRGB24(const uint8* src_yplane, int src_ystride, const uint8* src_u, int src_stride_u,
const uint8* src_uplane, int src_ustride, const uint8* src_v, int src_stride_v,
const uint8* src_vplane, int src_vstride, uint8* dst_frame, int dst_stride_frame,
uint8* dst_frame, int dst_stride, int width, int height);
int src_width, int src_height);
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
int const uint8* src_u, int src_stride_u,
I420ToARGB4444(const uint8* src_yplane, int src_ystride, const uint8* src_v, int src_stride_v,
const uint8* src_uplane, int src_ustride, uint8* dst_frame, int dst_stride_frame,
const uint8* src_vplane, int src_vstride, int width, int height);
uint8* dst_frame, int dst_stride,
int src_width, int src_height); int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
int const uint8* src_v, int src_stride_v,
I420ToRGB565(const uint8* src_yplane, int src_ystride, uint8* dst_frame, int dst_stride_frame,
const uint8* src_uplane, int src_ustride, int width, int height);
const uint8* src_vplane, int src_vstride,
uint8* dst_frame, int dst_stride, int I420ToARGB1555(const uint8* src_y, int src_stride_y,
int src_width, int src_height); const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
int uint8* dst_frame, int dst_stride_frame,
I420ToARGB1555(const uint8* src_yplane, int src_ystride, int width, int height);
const uint8* src_uplane, int src_ustride,
const uint8* src_vplane, int src_vstride, int I420ToYUY2(const uint8* src_y, int src_stride_y,
uint8* dst_frame, int dst_stride, const uint8* src_u, int src_stride_u,
int src_width, int src_height); const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int int width, int height);
I420ToYUY2(const uint8* src_yplane, int src_ystride,
const uint8* src_uplane, int src_ustride, int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_vplane, int src_vstride, const uint8* src_u, int src_stride_u,
uint8* dst_frame, int dst_stride, const uint8* src_v, int src_stride_v,
int src_width, int src_height); uint8* dst_frame, int dst_stride_frame,
int width, int height);
int
I420ToUYVY(const uint8* src_yplane, int src_ystride, // TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height
const uint8* src_uplane, int src_ustride, int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
const uint8* src_vplane, int src_vstride, uint8* dst_frame, int dst_stride_frame,
uint8* dst_frame, int dst_stride, int width, int height);
int src_width, int src_height);
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
int uint8* dst_y, int dst_stride_y,
RGB24ToARGB(const uint8* src_frame, int src_stride, uint8* dst_u, int dst_stride_u,
uint8* dst_frame, int dst_stride, uint8* dst_v, int dst_stride_v,
int src_width, int src_height); int width, int height);
int int RAWToI420(const uint8* src_frame, int src_stride_frame,
RGB24ToI420(const uint8* src_frame, int src_stride, uint8* dst_y, int dst_stride_y,
uint8* dst_yplane, int dst_ystride, uint8* dst_u, int dst_stride_u,
uint8* dst_uplane, int dst_ustride, uint8* dst_v, int dst_stride_v,
uint8* dst_vplane, int dst_vstride, int width, int height);
int src_width, int src_height);
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
int uint8* dst_y, int dst_stride_y,
RAWToI420(const uint8* src_frame, int src_stride, uint8* dst_u, int dst_stride_u,
uint8* dst_yplane, int dst_ystride, uint8* dst_v, int dst_stride_v,
uint8* dst_uplane, int dst_ustride, int width, int height);
uint8* dst_vplane, int dst_vstride,
int src_width, int src_height); int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
int uint8* dst_u, int dst_stride_u,
ABGRToI420(const uint8* src_frame, int src_stride, uint8* dst_v, int dst_stride_v,
uint8* dst_yplane, int dst_ystride, int width, int height);
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride, int ARGBToI420(const uint8* src_frame, int src_stride_frame,
int src_width, int src_height); uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
int uint8* dst_v, int dst_stride_v,
BGRAToI420(const uint8* src_frame, int src_stride, int width, int height);
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride, int NV12ToRGB565(const uint8* src_y, int src_stride_y,
uint8* dst_vplane, int dst_vstride, const uint8* src_uv, int src_stride_uv,
int src_width, int src_height); uint8* dst_frame, int dst_stride_frame,
int width, int height);
int
ARGBToI420(const uint8* src_frame, int src_stride,
uint8* dst_yplane, int dst_ystride,
uint8* dst_uplane, int dst_ustride,
uint8* dst_vplane, int dst_vstride,
int src_width, int src_height);
int
NV12ToRGB565(const uint8* src_yplane, int src_ystride,
const uint8* src_uvplane, int src_uvstride,
uint8* dst_frame, int dst_stride,
int src_width, int src_height);
} // namespace libyuv } // namespace libyuv
......
...@@ -10,8 +10,10 @@ ...@@ -10,8 +10,10 @@
#include "libyuv/convert.h" #include "libyuv/convert.h"
#include "libyuv/basic_types.h"
#include "conversion_tables.h" #include "conversion_tables.h"
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "row.h"
//#define SCALEOPT //Currently for windows only. June 2010 //#define SCALEOPT //Currently for windows only. June 2010
...@@ -30,29 +32,29 @@ static inline uint8 Clip(int32 val) { ...@@ -30,29 +32,29 @@ static inline uint8 Clip(int32 val) {
return (uint8) val; return (uint8) val;
} }
int I420ToRGB24(const uint8* src_yplane, int src_ystride, int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_uplane, int src_ustride, const uint8* src_u, int src_stride_u,
const uint8* src_vplane, int src_vstride, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride, uint8* dst_frame, int dst_stride_frame,
int src_width, int src_height) int width, int height) {
{ if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL ||
dst_frame == NULL)
return -1; return -1;
}
// RGB orientation - bottom up // RGB orientation - bottom up
uint8* out = dst_frame + dst_stride * src_height - dst_stride; // TODO(fbarchard): support inversion
uint8* out2 = out - dst_stride; uint8* out = dst_frame + dst_stride_frame * height - dst_stride_frame;
uint8* out2 = out - dst_stride_frame;
int h, w; int h, w;
int tmp_r, tmp_g, tmp_b; int tmp_r, tmp_g, tmp_b;
const uint8 *y1, *y2 ,*u, *v; const uint8 *y1, *y2 ,*u, *v;
y1 = src_yplane; y1 = src_y;
y2 = y1 + src_ystride; y2 = y1 + src_stride_y;
u = src_uplane; u = src_u;
v = src_vplane; v = src_v;
for (h = ((src_height + 1) >> 1); h > 0; h--){ for (h = ((height + 1) >> 1); h > 0; h--){
// 2 rows at a time, 2 y's at a time // 2 rows at a time, 2 y's at a time
for (w = 0; w < ((src_width + 1) >> 1); w++){ for (w = 0; w < ((width + 1) >> 1); w++){
// Vertical and horizontal sub-sampling // Vertical and horizontal sub-sampling
tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8); tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8); tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
...@@ -89,41 +91,40 @@ int I420ToRGB24(const uint8* src_yplane, int src_ystride, ...@@ -89,41 +91,40 @@ int I420ToRGB24(const uint8* src_yplane, int src_ystride,
u++; u++;
v++; v++;
} }
y1 += src_ystride + src_ystride - src_width; y1 += src_stride_y + src_stride_y - width;
y2 += src_ystride + src_ystride - src_width; y2 += src_stride_y + src_stride_y - width;
u += src_ustride - ((src_width + 1) >> 1); u += src_stride_u - ((width + 1) >> 1);
v += src_vstride - ((src_width + 1) >> 1); v += src_stride_v - ((width + 1) >> 1);
out -= dst_stride * 3; out -= dst_stride_frame * 3;
out2 -= dst_stride * 3; out2 -= dst_stride_frame * 3;
} // end height for } // end height for
return 0; return 0;
} }
// Little Endian... // Little Endian...
int I420ToARGB4444(const uint8* src_yplane, int src_ystride, int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_uplane, int src_ustride, const uint8* src_u, int src_stride_u,
const uint8* src_vplane, int src_vstride, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride, uint8* dst_frame, int dst_stride_frame,
int src_width, int src_height) int width, int height) {
{ if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL ||
dst_frame == NULL)
return -1; return -1;
}
// RGB orientation - bottom up // RGB orientation - bottom up
uint8* out = dst_frame + dst_stride * (src_height - 1); uint8* out = dst_frame + dst_stride_frame * (height - 1);
uint8* out2 = out - dst_stride; uint8* out2 = out - dst_stride_frame;
int tmp_r, tmp_g, tmp_b; int tmp_r, tmp_g, tmp_b;
const uint8 *y1,*y2, *u, *v; const uint8 *y1,*y2, *u, *v;
y1 = src_yplane; y1 = src_y;
y2 = y1 + src_ystride; y2 = y1 + src_stride_y;
u = src_uplane; u = src_u;
v = src_vplane; v = src_v;
int h, w; int h, w;
for (h = ((src_height + 1) >> 1); h > 0; h--){ for (h = ((height + 1) >> 1); h > 0; h--) {
// 2 rows at a time, 2 y's at a time // 2 rows at a time, 2 y's at a time
for (w = 0; w < ((src_width + 1) >> 1); w++){ for (w = 0; w < ((width + 1) >> 1); w++) {
// Vertical and horizontal sub-sampling // Vertical and horizontal sub-sampling
// Convert to RGB888 and re-scale to 4 bits // Convert to RGB888 and re-scale to 4 bits
tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8); tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
...@@ -157,51 +158,50 @@ int I420ToARGB4444(const uint8* src_yplane, int src_ystride, ...@@ -157,51 +158,50 @@ int I420ToARGB4444(const uint8* src_yplane, int src_ystride,
u++; u++;
v++; v++;
} }
y1 += 2 * src_ystride - src_width; y1 += 2 * src_stride_y - width;
y2 += 2 * src_ystride - src_width; y2 += 2 * src_stride_y - width;
u += src_ustride - ((src_width + 1) >> 1); u += src_stride_u - ((width + 1) >> 1);
v += src_vstride - ((src_width + 1) >> 1); v += src_stride_v - ((width + 1) >> 1);
out -= (dst_stride + src_width) * 2; out -= (dst_stride_frame + width) * 2;
out2 -= (dst_stride + src_width) * 2; out2 -= (dst_stride_frame + width) * 2;
} // end height for } // end height for
return 0; return 0;
} }
int I420ToRGB565(const uint8* src_yplane, int src_ystride, int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uplane, int src_ustride, const uint8* src_u, int src_stride_u,
const uint8* src_vplane, int src_vstride, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride, uint8* dst_frame, int dst_stride_frame,
int src_width, int src_height) int width, int height) {
{ if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL ||
dst_frame == NULL)
return -1; return -1;
}
// Negative height means invert the image. // Negative height means invert the image.
if (src_height < 0) { if (height < 0) {
src_height = -src_height; height = -height;
src_yplane = src_yplane + (src_height - 1) * src_ystride; src_y = src_y + (height - 1) * src_stride_y;
src_uplane = src_uplane + (src_height - 1) * src_ustride; src_u = src_u + (height - 1) * src_stride_u;
src_vplane = src_vplane + (src_height - 1) * src_vstride; src_v = src_v + (height - 1) * src_stride_v;
src_ystride = -src_ystride; src_stride_y = -src_stride_y;
src_ustride = -src_ustride; src_stride_u = -src_stride_u;
src_vstride = -src_vstride; src_stride_v = -src_stride_v;
} }
uint16* out = (uint16*)(dst_frame) + dst_stride * (src_height - 1); uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
uint16* out2 = out - dst_stride; uint16* out2 = out - dst_stride_frame;
int tmp_r, tmp_g, tmp_b; int tmp_r, tmp_g, tmp_b;
const uint8 *y1,*y2, *u, *v; const uint8* y1,* y2, * u, * v;
y1 = src_yplane; y1 = src_y;
y2 = y1 + src_ystride; y2 = y1 + src_stride_y;
u = src_uplane; u = src_u;
v = src_vplane; v = src_v;
int h, w; int h, w;
for (h = ((src_height + 1) >> 1); h > 0; h--){ for (h = ((height + 1) >> 1); h > 0; h--){
// 2 rows at a time, 2 y's at a time // 2 rows at a time, 2 y's at a time
for (w = 0; w < ((src_width + 1) >> 1); w++){ for (w = 0; w < ((width + 1) >> 1); w++){
// Vertical and horizontal sub-sampling // Vertical and horizontal sub-sampling
// 1. Convert to RGB888 // 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565 // 2. Shift to adequate location (in the 16 bit word) - RGB 565
...@@ -237,41 +237,39 @@ int I420ToRGB565(const uint8* src_yplane, int src_ystride, ...@@ -237,41 +237,39 @@ int I420ToRGB565(const uint8* src_yplane, int src_ystride,
u++; u++;
v++; v++;
} }
y1 += 2 * src_ystride - src_width; y1 += 2 * src_stride_y - width;
y2 += 2 * src_ystride - src_width; y2 += 2 * src_stride_y - width;
u += src_ustride - ((src_width + 1) >> 1); u += src_stride_u - ((width + 1) >> 1);
v += src_vstride - ((src_width + 1) >> 1); v += src_stride_v - ((width + 1) >> 1);
out -= 2 * dst_stride + src_width; out -= 2 * dst_stride_frame + width;
out2 -= 2 * dst_stride + src_width; out2 -= 2 * dst_stride_frame + width;
} }
return 0; return 0;
} }
int I420ToARGB1555(const uint8* src_yplane, int src_ystride, int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_uplane, int src_ustride, const uint8* src_u, int src_stride_u,
const uint8* src_vplane, int src_vstride, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride, uint8* dst_frame, int dst_stride_frame,
int src_width, int src_height) int width, int height) {
{ if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL ||
dst_frame == NULL){
return -1; return -1;
} }
uint16* out = (uint16*)(dst_frame) + dst_stride * (src_height - 1); uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
uint16* out2 = out - dst_stride ; uint16* out2 = out - dst_stride_frame ;
int32 tmp_r, tmp_g, tmp_b; int32 tmp_r, tmp_g, tmp_b;
const uint8 *y1,*y2, *u, *v; const uint8 *y1,*y2, *u, *v;
int h, w; int h, w;
y1 = src_yplane; y1 = src_y;
y2 = y1 + src_ystride; y2 = y1 + src_stride_y;
u = src_uplane; u = src_u;
v = src_vplane; v = src_v;
for (h = ((src_height + 1) >> 1); h > 0; h--){ for (h = ((height + 1) >> 1); h > 0; h--){
// 2 rows at a time, 2 y's at a time // 2 rows at a time, 2 y's at a time
for (w = 0; w < ((src_width + 1) >> 1); w++){ for (w = 0; w < ((width + 1) >> 1); w++){
// Vertical and horizontal sub-sampling // Vertical and horizontal sub-sampling
// 1. Convert to RGB888 // 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 555 // 2. Shift to adequate location (in the 16 bit word) - RGB 555
...@@ -307,41 +305,37 @@ int I420ToARGB1555(const uint8* src_yplane, int src_ystride, ...@@ -307,41 +305,37 @@ int I420ToARGB1555(const uint8* src_yplane, int src_ystride,
u++; u++;
v++; v++;
} }
y1 += 2 * src_ystride - src_width; y1 += 2 * src_stride_y - width;
y2 += 2 * src_ystride - src_width; y2 += 2 * src_stride_y - width;
u += src_ustride - ((src_width + 1) >> 1); u += src_stride_u - ((width + 1) >> 1);
v += src_vstride - ((src_width + 1) >> 1); v += src_stride_v - ((width + 1) >> 1);
out -= 2 * dst_stride + src_width; out -= 2 * dst_stride_frame + width;
out2 -= 2 * dst_stride + src_width; out2 -= 2 * dst_stride_frame + width;
} }
return 0; return 0;
} }
int I420ToYUY2(const uint8* src_yplane, int src_ystride, int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_uplane, int src_ustride, const uint8* src_u, int src_stride_u,
const uint8* src_vplane, int src_vstride, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride, uint8* dst_frame, int dst_stride_frame,
int src_width, int src_height) int width, int height) {
{ if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL ||
dst_frame == NULL){
return -1; return -1;
} }
const uint8* in1 = src_yplane; const uint8* in1 = src_y;
const uint8* in2 = src_yplane + src_ystride ; const uint8* in2 = src_y + src_stride_y;
const uint8* src_u = src_uplane;
const uint8* src_v = src_vplane;
uint8* out1 = dst_frame; uint8* out1 = dst_frame;
uint8* out2 = dst_frame + dst_stride; uint8* out2 = dst_frame + dst_stride_frame;
// YUY2 - Macro-pixel = 2 image pixels // YUY2 - Macro-pixel = 2 image pixels
// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
#ifndef SCALEOPT #ifndef SCALEOPT
for (int i = 0; i < ((src_height + 1) >> 1); i++){ for (int i = 0; i < ((height + 1) >> 1); i++){
for (int j = 0; j < ((src_width + 1) >> 1); j++){ for (int j = 0; j < ((width + 1) >> 1); j++){
out1[0] = in1[0]; out1[0] = in1[0];
out1[1] = *src_u; out1[1] = *src_u;
out1[2] = in1[1]; out1[2] = in1[1];
...@@ -358,16 +352,15 @@ int I420ToYUY2(const uint8* src_yplane, int src_ystride, ...@@ -358,16 +352,15 @@ int I420ToYUY2(const uint8* src_yplane, int src_ystride,
in1 += 2; in1 += 2;
in2 += 2; in2 += 2;
} }
in1 += 2 * src_ystride - src_width; in1 += 2 * src_stride_y - width;
in2 += 2 * src_ystride - src_width; in2 += 2 * src_stride_y - width;
src_u += src_ustride - ((src_width + 1) >> 1); src_u += src_stride_u - ((width + 1) >> 1);
src_v += src_vstride - ((src_width + 1) >> 1); src_v += src_stride_v - ((width + 1) >> 1);
out1 += dst_stride + dst_stride - 2 * src_width; out1 += dst_stride_frame + dst_stride_frame - 2 * width;
out2 += dst_stride + dst_stride - 2 * src_width; out2 += dst_stride_frame + dst_stride_frame - 2 * width;
} }
#else #else
for (WebRtc_UWord32 i = 0; i < ((height + 1) >> 1);i++) for (WebRtc_UWord32 i = 0; i < ((height + 1) >> 1);i++) {
{
int32 width__ = (width >> 4); int32 width__ = (width >> 4);
_asm _asm
{ {
...@@ -424,40 +417,39 @@ int I420ToYUY2(const uint8* src_yplane, int src_ystride, ...@@ -424,40 +417,39 @@ int I420ToYUY2(const uint8* src_yplane, int src_ystride,
;popa ;popa
emms emms
} }
in1 += 2 * src_ystride - src_width; in1 += 2 * src_stride_y - width;
in2 += 2 * src_ystride - src_width; in2 += 2 * src_stride_y - width;
out1 += dst_stride + dst_stride - 2 * width; out1 += dst_stride_frame + dst_stride_frame - 2 * width;
out2 += dst_stride + dst_stride - 2 * width; out2 += dst_stride_frame + dst_stride_frame - 2 * width;
} }
#endif #endif
return 0; return 0;
} }
int I420ToUYVY(const uint8* src_yplane, int src_ystride, int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_uplane, int src_ustride, const uint8* src_u, int src_stride_u,
const uint8* src_vplane, int src_vstride, const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride, uint8* dst_frame, int dst_stride_frame,
int src_width, int src_height) int width, int height) {
{ if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
if (src_yplane == NULL || src_uplane == NULL || src_vplane == NULL ||
dst_frame == NULL)
return -1; return -1;
}
int i = 0; int i = 0;
const uint8* y1 = src_yplane; const uint8* y1 = src_y;
const uint8* y2 = y1 + src_ystride; const uint8* y2 = y1 + src_stride_y;
const uint8* u = src_uplane; const uint8* u = src_u;
const uint8* v = src_vplane; const uint8* v = src_v;
uint8* out1 = dst_frame; uint8* out1 = dst_frame;
uint8* out2 = dst_frame + dst_stride; uint8* out2 = dst_frame + dst_stride_frame;
// Macro-pixel = 2 image pixels // Macro-pixel = 2 image pixels
// U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5..... // U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
#ifndef SCALEOPT #ifndef SCALEOPT
for (; i < ((src_height + 1) >> 1);i++){ for (; i < ((height + 1) >> 1); i++) {
for (int j = 0; j < ((src_width + 1) >> 1) ;j++){ for (int j = 0; j < ((width + 1) >> 1); j++) {
out1[0] = *u; out1[0] = *u;
out1[1] = y1[0]; out1[1] = y1[0];
out1[2] = *v; out1[2] = *v;
...@@ -474,16 +466,15 @@ int I420ToUYVY(const uint8* src_yplane, int src_ystride, ...@@ -474,16 +466,15 @@ int I420ToUYVY(const uint8* src_yplane, int src_ystride,
y1 += 2; y1 += 2;
y2 += 2; y2 += 2;
} }
y1 += 2 * src_ystride - src_width; y1 += 2 * src_stride_y - width;
y2 += 2 * src_ystride - src_width; y2 += 2 * src_stride_y - width;
u += src_ustride - ((src_width + 1) >> 1); u += src_stride_u - ((width + 1) >> 1);
v += src_vstride - ((src_width + 1) >> 1); v += src_stride_v - ((width + 1) >> 1);
out1 += 2 * (dst_stride - src_width); out1 += 2 * (dst_stride_frame - width);
out2 += 2 * (dst_stride - src_width); out2 += 2 * (dst_stride_frame - width);
} }
#else #else
for (; i < (height >> 1);i++) for (; i < (height >> 1);i++) {
{
int32 width__ = (width >> 4); int32 width__ = (width >> 4);
_asm _asm
{ {
...@@ -540,35 +531,35 @@ loop0: ...@@ -540,35 +531,35 @@ loop0:
} }
in1 += width; in1 += width;
in2 += width; in2 += width;
out1 += 2 * (dst_stride - width); out1 += 2 * (dst_stride_frame - width);
out2 += 2 * (dst_stride - width); out2 += 2 * (dst_stride_frame - width);
} }
#endif #endif
return 0; return 0;
} }
int NV12ToRGB565(const uint8* src_yplane, int src_ystride, int NV12ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_uvplane, int src_uvstride, const uint8* src_uv, int src_stride_uv,
uint8* dst_frame, int dst_stride, uint8* dst_frame, int dst_stride_frame,
int src_width, int src_height) int width, int height) {
{ if (src_y == NULL || src_uv == NULL || dst_frame == NULL) {
if (src_yplane == NULL || src_uvplane == NULL || dst_frame == NULL)
return -1; return -1;
}
// Bi-Planar: Y plane followed by an interlaced U and V plane // Bi-Planar: Y plane followed by an interlaced U and V plane
const uint8* interlacedSrc = src_uvplane; const uint8* interlacedSrc = src_uv;
uint16* out = (uint16*)(src_yplane) + dst_stride * (src_height - 1); uint16* out = (uint16*)(src_y) + dst_stride_frame * (height - 1);
uint16* out2 = out - dst_stride; uint16* out2 = out - dst_stride_frame;
int32 tmp_r, tmp_g, tmp_b; int32 tmp_r, tmp_g, tmp_b;
const uint8 *y1,*y2; const uint8 *y1,*y2;
y1 = src_yplane; y1 = src_y;
y2 = y1 + src_ystride; y2 = y1 + src_stride_y;
int h, w; int h, w;
for (h = ((src_height + 1) >> 1); h > 0; h--){ for (h = ((height + 1) >> 1); h > 0; h--) {
// 2 rows at a time, 2 y's at a time // 2 rows at a time, 2 y's at a time
for (w = 0; w < ((src_width + 1) >> 1); w++){ for (w = 0; w < ((width + 1) >> 1); w++) {
// Vertical and horizontal sub-sampling // Vertical and horizontal sub-sampling
// 1. Convert to RGB888 // 1. Convert to RGB888
// 2. Shift to adequate location (in the 16 bit word) - RGB 565 // 2. Shift to adequate location (in the 16 bit word) - RGB 565
...@@ -608,29 +599,30 @@ int NV12ToRGB565(const uint8* src_yplane, int src_ystride, ...@@ -608,29 +599,30 @@ int NV12ToRGB565(const uint8* src_yplane, int src_ystride,
out2 += 2; out2 += 2;
interlacedSrc += 2; interlacedSrc += 2;
} }
y1 += 2 * src_ystride - src_width; y1 += 2 * src_stride_y - width;
y2 += 2 * src_ystride - src_width; y2 += 2 * src_stride_y - width;
interlacedSrc += src_uvstride - ((src_width + 1) >> 1); interlacedSrc += src_stride_uv - ((width + 1) >> 1);
out -= 3 * dst_stride + dst_stride - src_width; out -= 3 * dst_stride_frame + dst_stride_frame - width;
out2 -= 3 * dst_stride + dst_stride - src_width; out2 -= 3 * dst_stride_frame + dst_stride_frame - width;
} }
return 0; return 0;
} }
int RGB24ToARGB(const uint8* src_frame, int src_stride, // TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height
uint8* dst_frame, int dst_stride, int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
int src_width, int src_height) uint8* dst_frame, int dst_stride_frame,
{ int width, int height) {
if (src_frame == NULL || dst_frame == NULL) if (src_frame == NULL || dst_frame == NULL) {
return -1; return -1;
}
int i, j, offset; int i, j, offset;
uint8* outFrame = dst_frame; uint8* outFrame = dst_frame;
const uint8* inFrame = src_frame; const uint8* inFrame = src_frame;
outFrame += dst_stride * (src_height - 1) * 4; outFrame += dst_stride_frame * (height - 1) * 4;
for (i = 0; i < src_height; i++){ for (i = 0; i < height; i++) {
for (j = 0; j < src_width; j++){ for (j = 0; j < width; j++) {
offset = j * 4; offset = j * 4;
outFrame[0 + offset] = inFrame[0]; outFrame[0 + offset] = inFrame[0];
outFrame[1 + offset] = inFrame[1]; outFrame[1 + offset] = inFrame[1];
...@@ -638,8 +630,8 @@ int RGB24ToARGB(const uint8* src_frame, int src_stride, ...@@ -638,8 +630,8 @@ int RGB24ToARGB(const uint8* src_frame, int src_stride,
outFrame[3 + offset] = 0xff; outFrame[3 + offset] = 0xff;
inFrame += 3; inFrame += 3;
} }
outFrame -= 4 * (dst_stride - src_width); outFrame -= 4 * (dst_stride_frame - width);
inFrame += src_stride - src_width; inFrame += src_stride_frame - width;
} }
return 0; return 0;
} }
...@@ -654,10 +646,10 @@ int RGB24ToARGB(const uint8* src_frame, int src_stride, ...@@ -654,10 +646,10 @@ int RGB24ToARGB(const uint8* src_frame, int src_stride,
static void \ static void \
NAME(const uint8* src_row0, const uint8* src_row1, \ NAME(const uint8* src_row0, const uint8* src_row1, \
uint8* dst_yplane0, uint8* dst_yplane1, \ uint8* dst_yplane0, uint8* dst_yplane1, \
uint8* dst_uplane, \ uint8* dst_u, \
uint8* dst_vplane, \ uint8* dst_v, \
int src_width) { \ int width) { \
for (int x = 0; x < src_width - 1; x += 2) { \ for (int x = 0; x < width - 1; x += 2) { \
dst_yplane0[0] = (uint8)((src_row0[R] * 66 + \ dst_yplane0[0] = (uint8)((src_row0[R] * 66 + \
src_row0[G] * 129 + \ src_row0[G] * 129 + \
src_row0[B] * 25 + 128) >> 8) + 16; \ src_row0[B] * 25 + 128) >> 8) + 16; \
...@@ -670,14 +662,14 @@ NAME(const uint8* src_row0, const uint8* src_row1, \ ...@@ -670,14 +662,14 @@ NAME(const uint8* src_row0, const uint8* src_row1, \
dst_yplane1[1] = (uint8)((src_row1[R + BPP] * 66 + \ dst_yplane1[1] = (uint8)((src_row1[R + BPP] * 66 + \
src_row1[G + BPP] * 129 + \ src_row1[G + BPP] * 129 + \
src_row1[B + BPP] * 25 + 128) >> 8) + 16; \ src_row1[B + BPP] * 25 + 128) >> 8) + 16; \
dst_uplane[0] = (uint8)(((src_row0[R] + src_row0[R + BPP] + \ dst_u[0] = (uint8)(((src_row0[R] + src_row0[R + BPP] + \
src_row1[R] + src_row1[R + BPP]) * -38 + \ src_row1[R] + src_row1[R + BPP]) * -38 + \
(src_row0[G] + src_row0[G + BPP] + \ (src_row0[G] + src_row0[G + BPP] + \
src_row1[G] + src_row1[G + BPP]) * -74 + \ src_row1[G] + src_row1[G + BPP]) * -74 + \
(src_row0[B] + src_row0[B + BPP] + \ (src_row0[B] + src_row0[B + BPP] + \
src_row1[B] + src_row1[B + BPP]) * 112 + \ src_row1[B] + src_row1[B + BPP]) * 112 + \
+ 512) >> 10) + 128; \ + 512) >> 10) + 128; \
dst_vplane[0] = (uint8)(((src_row0[R] + src_row0[R + BPP] + \ dst_v[0] = (uint8)(((src_row0[R] + src_row0[R + BPP] + \
src_row1[R] + src_row1[R + BPP]) * 112 + \ src_row1[R] + src_row1[R + BPP]) * 112 + \
(src_row0[G] + src_row0[G + BPP] + \ (src_row0[G] + src_row0[G + BPP] + \
src_row1[G] + src_row1[G + BPP]) * -94 + \ src_row1[G] + src_row1[G + BPP]) * -94 + \
...@@ -686,26 +678,26 @@ NAME(const uint8* src_row0, const uint8* src_row1, \ ...@@ -686,26 +678,26 @@ NAME(const uint8* src_row0, const uint8* src_row1, \
+ 512) >> 10) + 128; \ + 512) >> 10) + 128; \
dst_yplane0 += 2; \ dst_yplane0 += 2; \
dst_yplane1 += 2; \ dst_yplane1 += 2; \
++dst_uplane; \ ++dst_u; \
++dst_vplane; \ ++dst_v; \
src_row0 += BPP * 2; \ src_row0 += BPP * 2; \
src_row1 += BPP * 2; \ src_row1 += BPP * 2; \
} \ } \
if (src_width & 1) { \ if (width & 1) { \
dst_yplane0[0] = (uint8)((src_row0[R] * 66 + \ dst_yplane0[0] = (uint8)((src_row0[R] * 66 + \
src_row0[G] * 129 + \ src_row0[G] * 129 + \
src_row0[B] * 25 + 128) >> 8) + 16; \ src_row0[B] * 25 + 128) >> 8) + 16; \
dst_yplane1[0] = (uint8)((src_row1[R] * 66 + \ dst_yplane1[0] = (uint8)((src_row1[R] * 66 + \
src_row1[G] * 129 + \ src_row1[G] * 129 + \
src_row1[B] * 25 + 128) >> 8) + 16; \ src_row1[B] * 25 + 128) >> 8) + 16; \
dst_uplane[0] = (uint8)(((src_row0[R] + \ dst_u[0] = (uint8)(((src_row0[R] + \
src_row1[R]) * -38 + \ src_row1[R]) * -38 + \
(src_row0[G] + \ (src_row0[G] + \
src_row1[G]) * -74 + \ src_row1[G]) * -74 + \
(src_row0[B] + \ (src_row0[B] + \
src_row1[B]) * 112 + \ src_row1[B]) * 112 + \
+ 256) >> 9) + 128; \ + 256) >> 9) + 128; \
dst_vplane[0] = (uint8)(((src_row0[R] + \ dst_v[0] = (uint8)(((src_row0[R] + \
src_row1[R]) * 112 + \ src_row1[R]) * 112 + \
(src_row0[G] + \ (src_row0[G] + \
src_row1[G]) * -94 + \ src_row1[G]) * -94 + \
...@@ -723,104 +715,157 @@ MAKEROWRGBTOI420(ABGRToI420Row_C, 0, 1, 2, 4) ...@@ -723,104 +715,157 @@ MAKEROWRGBTOI420(ABGRToI420Row_C, 0, 1, 2, 4)
MAKEROWRGBTOI420(RGB24ToI420Row_C, 2, 1, 0, 3) MAKEROWRGBTOI420(RGB24ToI420Row_C, 2, 1, 0, 3)
MAKEROWRGBTOI420(RAWToI420Row_C, 0, 1, 2, 3) MAKEROWRGBTOI420(RAWToI420Row_C, 0, 1, 2, 3)
static int RGBToI420(const uint8* src_frame, int src_stride, static int RGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_yplane, int dst_ystride, uint8* dst_y, int dst_stride_y,
uint8* dst_uplane, int dst_ustride, uint8* dst_u, int dst_stride_u,
uint8* dst_vplane, int dst_vstride, uint8* dst_v, int dst_stride_v,
int src_width, int src_height, int width, int height,
void (*RGBToI420Row)(const uint8* src_row0, void (*RGBToI420Row)(const uint8* src_row0,
const uint8* src_row1, const uint8* src_row1,
uint8* dst_yplane0, uint8* dst_yplane0,
uint8* dst_yplane1, uint8* dst_yplane1,
uint8* dst_uplane, uint8* dst_u,
uint8* dst_vplane, uint8* dst_v,
int src_width)) { int width)) {
if (src_frame == NULL || dst_yplane == NULL || if (src_frame == NULL || dst_y == NULL ||
dst_vplane == NULL || dst_vplane == NULL) dst_v == NULL || dst_v == NULL)
return -1; return -1;
if (src_height < 0) { if (height < 0) {
src_height = -src_height; height = -height;
src_frame = src_frame + src_stride * (src_height -1); src_frame = src_frame + src_stride_frame * (height -1);
src_stride = -src_stride; src_stride_frame = -src_stride_frame;
} }
for (int y = 0; y < src_height - 1; y += 2) { for (int y = 0; y < height - 1; y += 2) {
RGBToI420Row(src_frame, src_frame + src_stride, RGBToI420Row(src_frame, src_frame + src_stride_frame,
dst_yplane, dst_yplane + dst_ystride, dst_y, dst_y + dst_stride_y,
dst_uplane, dst_vplane, dst_u, dst_v,
src_width); width);
src_frame += src_stride * 2; src_frame += src_stride_frame * 2;
dst_yplane += dst_ystride * 2; dst_y += dst_stride_y * 2;
dst_uplane += dst_ustride; dst_u += dst_stride_u;
dst_vplane += dst_vstride; dst_v += dst_stride_v;
} }
if (src_height & 1) { if (height & 1) {
RGBToI420Row(src_frame, src_frame, RGBToI420Row(src_frame, src_frame,
dst_yplane, dst_yplane, dst_y, dst_y,
dst_uplane, dst_vplane, dst_u, dst_v,
src_width); width);
} }
return 0; return 0;
} }
int ARGBToI420(const uint8* src_frame, int src_stride, int ARGBToI420_Reference(const uint8* src_frame, int src_stride_frame,
uint8* dst_yplane, int dst_ystride, uint8* dst_y, int dst_stride_y,
uint8* dst_uplane, int dst_ustride, uint8* dst_u, int dst_stride_u,
uint8* dst_vplane, int dst_vstride, uint8* dst_v, int dst_stride_v,
int src_width, int src_height) { int width, int height) {
return RGBToI420(src_frame, src_stride, return RGBToI420(src_frame, src_stride_frame,
dst_yplane, dst_ystride, dst_y, dst_stride_y,
dst_uplane, dst_ustride, dst_u, dst_stride_u,
dst_vplane, dst_vstride, dst_v, dst_stride_v,
src_width, src_height, ARGBToI420Row_C); width, height, ARGBToI420Row_C);
} }
int BGRAToI420(const uint8* src_frame, int src_stride, int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_yplane, int dst_ystride, uint8* dst_y, int dst_stride_y,
uint8* dst_uplane, int dst_ustride, uint8* dst_u, int dst_stride_u,
uint8* dst_vplane, int dst_vstride, uint8* dst_v, int dst_stride_v,
int src_width, int src_height) { int width, int height) {
return RGBToI420(src_frame, src_stride, return RGBToI420(src_frame, src_stride_frame,
dst_yplane, dst_ystride, dst_y, dst_stride_y,
dst_uplane, dst_ustride, dst_u, dst_stride_u,
dst_vplane, dst_vstride, dst_v, dst_stride_v,
src_width, src_height, BGRAToI420Row_C); width, height, BGRAToI420Row_C);
} }
int ABGRToI420(const uint8* src_frame, int src_stride, int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_yplane, int dst_ystride, uint8* dst_y, int dst_stride_y,
uint8* dst_uplane, int dst_ustride, uint8* dst_u, int dst_stride_u,
uint8* dst_vplane, int dst_vstride, uint8* dst_v, int dst_stride_v,
int src_width, int src_height) { int width, int height) {
return RGBToI420(src_frame, src_stride, return RGBToI420(src_frame, src_stride_frame,
dst_yplane, dst_ystride, dst_y, dst_stride_y,
dst_uplane, dst_ustride, dst_u, dst_stride_u,
dst_vplane, dst_vstride, dst_v, dst_stride_v,
src_width, src_height, ABGRToI420Row_C); width, height, ABGRToI420Row_C);
} }
int RGB24ToI420(const uint8* src_frame, int src_stride, int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_yplane, int dst_ystride, uint8* dst_y, int dst_stride_y,
uint8* dst_uplane, int dst_ustride, uint8* dst_u, int dst_stride_u,
uint8* dst_vplane, int dst_vstride, uint8* dst_v, int dst_stride_v,
int src_width, int src_height) { int width, int height) {
return RGBToI420(src_frame, src_stride, return RGBToI420(src_frame, src_stride_frame,
dst_yplane, dst_ystride, dst_y, dst_stride_y,
dst_uplane, dst_ustride, dst_u, dst_stride_u,
dst_vplane, dst_vstride, dst_v, dst_stride_v,
src_width, src_height, RGB24ToI420Row_C); width, height, RGB24ToI420Row_C);
} }
int RAWToI420(const uint8* src_frame, int src_stride, int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_yplane, int dst_ystride, uint8* dst_y, int dst_stride_y,
uint8* dst_uplane, int dst_ustride, uint8* dst_u, int dst_stride_u,
uint8* dst_vplane, int dst_vstride, uint8* dst_v, int dst_stride_v,
int src_width, int src_height) { int width, int height) {
return RGBToI420(src_frame, src_stride, return RGBToI420(src_frame, src_stride_frame,
dst_yplane, dst_ystride, dst_y, dst_stride_y,
dst_uplane, dst_ustride, dst_u, dst_stride_u,
dst_vplane, dst_vstride, dst_v, dst_stride_v,
src_width, src_height, RAWToI420Row_C); width, height, RAWToI420Row_C);
}
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height) {
if (height < 0) {
height = -height;
src_frame = src_frame + (height - 1) * src_stride_frame;
src_stride_frame = -src_stride_frame;
}
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(HAS_ARGBTOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
(width % 8 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_y, 8) && (dst_stride_y % 8 == 0)) {
ARGBToYRow = ARGBToYRow_SSSE3;
} else
#endif
{
ARGBToYRow = ARGBToYRow_C;
}
#if defined(HAS_ARGBTOUVROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
(width % 8 == 0) &&
IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
IS_ALIGNED(dst_u, 4) && (dst_stride_u % 4 == 0) &&
IS_ALIGNED(dst_v, 4) && (dst_stride_v % 4 == 0)) {
ARGBToUVRow = ARGBToUVRow_SSSE3;
} else
#endif
{
ARGBToUVRow = ARGBToUVRow_C;
}
for (int y = 0; y < (height - 1); y += 2) {
ARGBToYRow(src_frame, dst_y, width);
ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
src_frame += src_stride_frame * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
ARGBToYRow(src_frame, dst_y, width);
ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
}
return 0;
} }
} // namespace libyuv } // namespace libyuv
...@@ -12,21 +12,10 @@ ...@@ -12,21 +12,10 @@
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "video_common.h" #include "video_common.h"
#include "row.h"
namespace libyuv { namespace libyuv {
// Most code in here is inspired by the material at
// http://www.siliconimaging.com/RGB%20Bayer.htm
// Forces compiler to inline, even against its better judgement. Use wisely.
#if defined(__GNUC__)
#define FORCE_INLINE __attribute__((always_inline))
#elif defined(WIN32)
#define FORCE_INLINE __forceinline
#else
#define FORCE_INLINE
#endif
// Note: to do this with Neon vld4.8 would load ARGB values into 4 registers // Note: to do this with Neon vld4.8 would load ARGB values into 4 registers
// and vst would select which 2 components to write. The low level would need // and vst would select which 2 components to write. The low level would need
// to be ARGBToBG, ARGBToGB, ARGBToRG, ARGBToGR // to be ARGBToBG, ARGBToGB, ARGBToRG, ARGBToGR
...@@ -333,46 +322,6 @@ int BayerRGBToARGB(const uint8* src_bayer, int src_stride_bayer, ...@@ -333,46 +322,6 @@ int BayerRGBToARGB(const uint8* src_bayer, int src_stride_bayer,
return 0; return 0;
} }
// Taken from http://en.wikipedia.org/wiki/YUV
static FORCE_INLINE int RGBToY(uint8 r, uint8 g, uint8 b) {
return (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
}
static FORCE_INLINE int RGBToU(uint8 r, uint8 g, uint8 b) {
return ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
}
static FORCE_INLINE int RGBToV(uint8 r, uint8 g, uint8 b) {
return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
}
static void ARGBtoYRow(const uint8* src_argb0,
uint8* dst_y, int width) {
for (int x = 0; x < width; ++x) {
dst_y[0] = RGBToY(src_argb0[2], src_argb0[1], src_argb0[0]);
src_argb0 += 4;
dst_y += 1;
}
}
static void ARGBtoUVRow(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u,
uint8* dst_v,
int width) {
const uint8* src_argb1 = src_argb0 + src_stride_argb;
for (int x = 0; x < width - 1; x += 2) {
uint8 ab = (src_argb0[0] + src_argb0[4] + src_argb1[0] + src_argb1[4]) >> 2;
uint8 ag = (src_argb0[1] + src_argb0[5] + src_argb1[1] + src_argb1[5]) >> 2;
uint8 ar = (src_argb0[2] + src_argb0[6] + src_argb1[2] + src_argb1[6]) >> 2;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb0 += 8;
src_argb1 += 8;
dst_u += 1;
dst_v += 1;
}
}
// Converts any Bayer RGB format to ARGB. // Converts any Bayer RGB format to ARGB.
int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
uint32 src_fourcc_bayer, uint32 src_fourcc_bayer,
...@@ -395,6 +344,28 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -395,6 +344,28 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_rgb, int pix); uint8* dst_rgb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_rgb, int pix); uint8* dst_rgb, int pix);
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#define kMaxStride (2048 * 4)
SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
#if defined(HAS_ARGBTOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
(width % 8 == 0) &&
IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) &&
IS_ALIGNED(dst_y, 8) && (dst_stride_y % 8 == 0)) {
ARGBToYRow = ARGBToYRow_SSSE3;
#if defined(HAS_ARGBTOUVROW_SSSE3)
ARGBToUVRow = ARGBToUVRow_SSSE3;
#else
ARGBToUVRow = ARGBToUVRow_C;
#endif
} else
#endif
{
ARGBToYRow = ARGBToYRow_C;
ARGBToUVRow = ARGBToUVRow_C;
}
switch (src_fourcc_bayer) { switch (src_fourcc_bayer) {
default: default:
...@@ -417,24 +388,23 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, ...@@ -417,24 +388,23 @@ int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
break; break;
} }
#define kMaxStride 2048 * 4
uint8 row[kMaxStride * 2];
for (int y = 0; y < (height - 1); y += 2) { for (int y = 0; y < (height - 1); y += 2) {
BayerRow0(src_bayer, src_stride_bayer, row, width); BayerRow0(src_bayer, src_stride_bayer, row, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer, BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
row + kMaxStride, width); row + kMaxStride, width);
ARGBtoYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBtoYRow(row + kMaxStride, dst_y + dst_stride_y, width); ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
ARGBtoUVRow(row, kMaxStride, dst_u, dst_v, width); ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
src_bayer += src_stride_bayer * 2; src_bayer += src_stride_bayer * 2;
dst_y += dst_stride_y * 2; dst_y += dst_stride_y * 2;
dst_u += dst_stride_u; dst_u += dst_stride_u;
dst_v += dst_stride_v; dst_v += dst_stride_v;
} }
// TODO(fbarchard): Make sure this filters properly
if (height & 1) { if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, row, width); BayerRow0(src_bayer, src_stride_bayer, row, width);
ARGBtoYRow(row, dst_y, width); ARGBToYRow(row, dst_y, width);
ARGBtoUVRow(row, 0, dst_u, dst_v, width); ARGBToUVRow(row, 0, dst_u, dst_v, width);
} }
return 0; return 0;
} }
......
...@@ -68,15 +68,6 @@ extern "C" TALIGN16(const uint8, kShuffleMaskRAWToARGB[16]) = { ...@@ -68,15 +68,6 @@ extern "C" TALIGN16(const uint8, kShuffleMaskRAWToARGB[16]) = {
2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
}; };
// Constant multiplication table for converting ARGB to I400.
extern "C" TALIGN16(const uint8, kMultiplyMaskARGBToI400[16]) = {
13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u
};
extern "C" TALIGN16(const uint8, kMultiplyMaskARGBToI400_2[16]) = {
1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u
};
#if defined(WIN32) && !defined(COVERAGE_ENABLED) #if defined(WIN32) && !defined(COVERAGE_ENABLED)
#define HAS_SPLITUV_SSE2 #define HAS_SPLITUV_SSE2
__declspec(naked) __declspec(naked)
...@@ -215,7 +206,7 @@ int I420Copy(const uint8* src_y, int src_stride_y, ...@@ -215,7 +206,7 @@ int I420Copy(const uint8* src_y, int src_stride_y,
static void SetRow32_NEON(uint8* dst, uint32 v32, int count) { static void SetRow32_NEON(uint8* dst, uint32 v32, int count) {
__asm__ volatile __asm__ volatile
( (
"vdup.u32 q0, %2 \n" // duplicate 4 ints "vdup.u32 {q0}, %2 \n" // duplicate 4 ints
"1:\n" "1:\n"
"vst1.u32 {q0}, [%0]! \n" // store "vst1.u32 {q0}, [%0]! \n" // store
"subs %1, %1, #16 \n" // 16 processed per loop "subs %1, %1, #16 \n" // 16 processed per loop
...@@ -393,16 +384,16 @@ int I422ToI420(const uint8* src_y, int src_stride_y, ...@@ -393,16 +384,16 @@ int I422ToI420(const uint8* src_y, int src_stride_y,
} }
static void I420CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, static void I420CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
uint8* dst, int dst_stride, uint8* dst, int dst_stride_frame,
int width, int height) { int width, int height) {
// Copy plane // Copy plane
for (int y = 0; y < height; y += 2) { for (int y = 0; y < height; y += 2) {
memcpy(dst, src, width); memcpy(dst, src, width);
src += src_stride_0; src += src_stride_0;
dst += dst_stride; dst += dst_stride_frame;
memcpy(dst, src, width); memcpy(dst, src, width);
src += src_stride_1; src += src_stride_1;
dst += dst_stride; dst += dst_stride_frame;
} }
} }
...@@ -503,13 +494,13 @@ int NV12ToI420(const uint8* src_y, int src_stride_y, ...@@ -503,13 +494,13 @@ int NV12ToI420(const uint8* src_y, int src_stride_y,
// Convert NV12 to I420. Deprecated. // Convert NV12 to I420. Deprecated.
int NV12ToI420(const uint8* src_y, int NV12ToI420(const uint8* src_y,
const uint8* src_uv, const uint8* src_uv,
int src_stride, int src_stride_frame,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u, uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v, uint8* dst_v, int dst_stride_v,
int width, int height) { int width, int height) {
return X420ToI420(src_y, src_stride, src_stride, return X420ToI420(src_y, src_stride_frame, src_stride_frame,
src_uv, src_stride, src_uv, src_stride_frame,
dst_y, dst_stride_y, dst_y, dst_stride_y,
dst_u, dst_stride_u, dst_u, dst_stride_u,
dst_v, dst_stride_v, dst_v, dst_stride_v,
...@@ -1371,38 +1362,6 @@ __asm { ...@@ -1371,38 +1362,6 @@ __asm {
} }
} }
#define HAS_ARGBTOI400ROW_SSSE3
__declspec(naked)
static void ARGBToI400Row_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_y
mov ecx, [esp + 12] // pix
movdqa xmm7, _kMultiplyMaskARGBToI400
movdqa xmm6, _kMultiplyMaskARGBToI400_2
movdqa xmm5, xmm6
psllw xmm5, 4 // Generate a mask of 0x10 on each byte.
convertloop :
movdqa xmm0, [eax]
pmaddubsw xmm0, xmm7
movdqa xmm1, [eax + 16]
psrlw xmm0, 7
pmaddubsw xmm1, xmm7
lea eax, [eax + 32]
psrlw xmm1, 7
packuswb xmm0, xmm1
pmaddubsw xmm0, xmm6
packuswb xmm0, xmm0
paddb xmm0, xmm5
movq qword ptr [edx], xmm0
lea edx, [edx + 8]
sub ecx, 8
ja convertloop
ret
}
}
#elif (defined(__x86_64__) || defined(__i386__)) && \ #elif (defined(__x86_64__) || defined(__i386__)) && \
!defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR) !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
...@@ -1554,39 +1513,6 @@ static void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, ...@@ -1554,39 +1513,6 @@ static void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb,
); );
} }
#define HAS_ARGBTOI400ROW_SSSE3
static void ARGBToI400Row_SSSE3(const uint8* src_argb, uint8* dst_y,
int pix) {
asm volatile(
"movdqa (%3),%%xmm7\n"
"movdqa (%4),%%xmm6\n"
"movdqa %%xmm6,%%xmm5\n"
"psllw $0x4,%%xmm5\n" // Generate a mask of 0x10 on each byte.
"1:"
"movdqa (%0),%%xmm0\n"
"pmaddubsw %%xmm7,%%xmm0\n"
"movdqa 0x10(%0),%%xmm1\n"
"psrlw $0x7,%%xmm0\n"
"pmaddubsw %%xmm7,%%xmm1\n"
"lea 0x20(%0),%0\n"
"psrlw $0x7,%%xmm1\n"
"packuswb %%xmm1,%%xmm0\n"
"pmaddubsw %%xmm6,%%xmm0\n"
"packuswb %%xmm0,%%xmm0\n"
"paddb %%xmm5,%%xmm0\n"
"movq %%xmm0,(%1)\n"
"lea 0x8(%1),%1\n"
"sub $0x8,%2\n"
"ja 1b\n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
: "r"(kMultiplyMaskARGBToI400), // %3
"r"(kMultiplyMaskARGBToI400_2) // %4
: "memory"
);
}
#endif #endif
static void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix) { static void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix) {
...@@ -1812,16 +1738,6 @@ int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra, ...@@ -1812,16 +1738,6 @@ int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
return 0; return 0;
} }
static void ARGBToI400Row_C(const uint8* src_argb, uint8* dst_y, int pix) {
for (int x = 0; x < pix; ++x) {
uint32 b = static_cast<uint32>(src_argb[0] * 13u);
uint32 g = static_cast<uint32>(src_argb[1] * 64u);
uint32 r = static_cast<uint32>(src_argb[2] * 33u);
*(dst_y++) = static_cast<uint8>(((b + g + r) >> 7) + 16u);
src_argb += 4;
}
}
// Convert ARGB to I400. // Convert ARGB to I400.
int ARGBToI400(const uint8* src_argb, int src_stride_argb, int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y, uint8* dst_y, int dst_stride_y,
...@@ -1831,21 +1747,21 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb, ...@@ -1831,21 +1747,21 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
src_argb = src_argb + (height - 1) * src_stride_argb; src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
void (*ARGBToI400Row)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
#if defined(HAS_ARGBTOI400ROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3)
if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
(width % 4 == 0) && (width % 4 == 0) &&
IS_ALIGNED(src_argb, 16) && (src_stride_argb % 16 == 0) && IS_ALIGNED(src_argb, 16) && (src_stride_argb % 16 == 0) &&
IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
ARGBToI400Row = ARGBToI400Row_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} else } else
#endif #endif
{ {
ARGBToI400Row = ARGBToI400Row_C; ARGBToYRow = ARGBToYRow_C;
} }
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
ARGBToI400Row(src_argb, dst_y, width); ARGBToYRow(src_argb, dst_y, width);
src_argb += src_stride_argb; src_argb += src_stride_argb;
dst_y += dst_stride_y; dst_y += dst_stride_y;
} }
......
...@@ -13,6 +13,16 @@ ...@@ -13,6 +13,16 @@
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#if (defined(WIN32) || defined(__x86_64__) || defined(__i386__)) \
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_ARGBTOYROW_SSSE3
#endif
#if defined(WIN32) \
&& !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
#define HAS_ARGBTOUVROW_SSSE3
#endif
extern "C" { extern "C" {
void FastConvertYUVToRGB32Row(const uint8* y_buf, void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -42,11 +52,24 @@ void FastConvertYToRGB32Row(const uint8* y_buf, ...@@ -42,11 +52,24 @@ void FastConvertYToRGB32Row(const uint8* y_buf,
uint8* rgb_buf, uint8* rgb_buf,
int width); int width);
#ifdef HAS_ARGBTOYROW_SSSE3
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#endif
void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToUVRow_C(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
#if defined(_MSC_VER) #if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var #define SIMD_ALIGNED(var) __declspec(align(16)) var
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
#else #else
#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#define TALIGN16(t, var) t var __attribute__((aligned(16)))
#endif #endif
#ifdef OSX #ifdef OSX
extern SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]); extern SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]);
extern SIMD_ALIGNED(const int16 kCoefficientsBgraY[768][4]); extern SIMD_ALIGNED(const int16 kCoefficientsBgraY[768][4]);
......
...@@ -12,6 +12,91 @@ ...@@ -12,6 +12,91 @@
extern "C" { extern "C" {
#ifdef HAS_ARGBTOYROW_SSSE3
// Constant multiplication table for converting ARGB to I400.
extern "C" TALIGN16(const uint8, kMultiplyMaskARGBToI400[16]) = {
13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u, 13u, 64u, 33u, 0u
};
extern "C" TALIGN16(const uint8, kAdd16[16]) = {
1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u, 1u
};
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
asm volatile(
"movdqa (%3),%%xmm7\n"
"movdqa (%4),%%xmm6\n"
"movdqa %%xmm6,%%xmm5\n"
"psllw $0x4,%%xmm5\n" // Generate a mask of 0x10 on each byte.
"1:"
"movdqa (%0),%%xmm0\n"
"pmaddubsw %%xmm7,%%xmm0\n"
"movdqa 0x10(%0),%%xmm1\n"
"psrlw $0x7,%%xmm0\n"
"pmaddubsw %%xmm7,%%xmm1\n"
"lea 0x20(%0),%0\n"
"psrlw $0x7,%%xmm1\n"
"packuswb %%xmm1,%%xmm0\n"
"pmaddubsw %%xmm6,%%xmm0\n"
"packuswb %%xmm0,%%xmm0\n"
"paddb %%xmm5,%%xmm0\n"
"movq %%xmm0,(%1)\n"
"lea 0x8(%1),%1\n"
"sub $0x8,%2\n"
"ja 1b\n"
: "+r"(src_argb), // %0
"+r"(dst_y), // %1
"+r"(pix) // %2
: "r"(kMultiplyMaskARGBToI400), // %3
"r"(kAdd16) // %4
: "memory"
);
}
#endif
static inline int RGBToY(uint8 r, uint8 g, uint8 b) {
return (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
}
static inline int RGBToU(uint8 r, uint8 g, uint8 b) {
return ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
}
static inline int RGBToV(uint8 r, uint8 g, uint8 b) {
return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
}
void ARGBToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {
for (int x = 0; x < width; ++x) {
dst_y[0] = RGBToY(src_argb0[2], src_argb0[1], src_argb0[0]);
src_argb0 += 4;
dst_y += 1;
}
}
void ARGBToUVRow_C(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
const uint8* src_argb1 = src_argb0 + src_stride_argb;
for (int x = 0; x < width - 1; x += 2) {
uint8 ab = (src_argb0[0] + src_argb0[4] + src_argb1[0] + src_argb1[4]) >> 2;
uint8 ag = (src_argb0[1] + src_argb0[5] + src_argb1[1] + src_argb1[5]) >> 2;
uint8 ar = (src_argb0[2] + src_argb0[6] + src_argb1[2] + src_argb1[6]) >> 2;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb0 += 8;
src_argb1 += 8;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8 ab = (src_argb0[0] + src_argb1[0]) >> 1;
uint8 ag = (src_argb0[1] + src_argb1[1]) >> 1;
uint8 ar = (src_argb0[2] + src_argb1[2]) >> 1;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
}
}
#if defined(__x86_64__) #if defined(__x86_64__)
// 64 bit linux gcc version // 64 bit linux gcc version
......
...@@ -12,6 +12,176 @@ ...@@ -12,6 +12,176 @@
extern "C" { extern "C" {
#ifdef HAS_ARGBTOYROW_SSSE3
#define TALIGN16(t, var) static __declspec(align(16)) t _ ## var
// Constant multiplication table for converting ARGB to I400.
extern "C" TALIGN16(const int8, kRGBToY[16]) = {
13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
};
extern "C" TALIGN16(const int8, kRGBToU[16]) = {
112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
};
extern "C" TALIGN16(const int8, kRGBToV[16]) = {
-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
};
extern "C" TALIGN16(const uint8, kAddY16[16]) = {
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
};
extern "C" TALIGN16(const uint8, kAddUV128[16]) = {
128u, 0u, 128u, 0u, 128u, 0u, 128u, 0u,
128u, 0u, 128u, 0u, 128u, 0u, 128u, 0u
};
__declspec(naked)
void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
__asm {
mov eax, [esp + 4] // src_argb
mov edx, [esp + 8] // dst_y
mov ecx, [esp + 12] // pix
movdqa xmm7, _kRGBToY
movdqa xmm6, _kAddY16
pcmpeqb xmm5, xmm5 // Generate mask 0x0000ffff
psrld xmm5, 16
convertloop :
movdqa xmm0, [eax]
movdqa xmm1, [eax + 16]
pmaddubsw xmm0, xmm7
lea eax, [eax + 32]
pmaddubsw xmm1, xmm7 // BG ra BG ra BG ra BG ra
palignr xmm2, xmm0, 2 // AR xx AR xx AR xx AR xx
paddw xmm2, xmm0 // BGRA xx BGRA xx BGRA xx BGRA xx
pand xmm2, xmm5 // BGRA 00 BGRA 00 BGRA 00 BGRA 00
palignr xmm3, xmm1, 2
paddw xmm3, xmm1
pand xmm3, xmm5 // BGRA 00 BGRA 00 BGRA 00 BGRA 00
packssdw xmm2, xmm3 // BGRA BGRA BGRA BGRA BGRA BGRA BGRA BGRA
psrlw xmm2, 7 // 0B xx 0B xx 0B xx 0B xx
packuswb xmm2, xmm2
paddb xmm2, xmm6
movq qword ptr [edx], xmm2
lea edx, [edx + 8]
sub ecx, 8
ja convertloop
ret
}
}
__declspec(naked)
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
__asm {
push esi
push edi
mov eax, [esp + 8 + 4] // src_argb
mov esi, [esp + 8 + 8] // src_stride_argb
mov edx, [esp + 8 + 12] // dst_u
mov edi, [esp + 8 + 16] // dst_v
mov ecx, [esp + 8 + 20] // pix
movdqa xmm7, _kRGBToU
movdqa xmm6, _kRGBToV
movdqa xmm5, _kAddUV128
pcmpeqb xmm4, xmm4 // Generate mask 0x0000ffff
psrld xmm4, 16
convertloop :
// step 1 - subsample 8x2 argb pixels to 4x1
movdqa xmm0, [eax] // 32x2 -> 32x1
movdqa xmm1, [eax + 16]
movdqa xmm2, [eax + esi]
movdqa xmm3, [eax + esi + 16]
lea eax, [eax + 32]
pavgb xmm0, xmm2
pavgb xmm1, xmm3
movdqa xmm2, xmm0 // 32x1 -> 16x1
shufps xmm0, xmm1, 0x88
shufps xmm2, xmm1, 0xdd
pavgb xmm0, xmm2
// step 2 - convert to U and V
// from here down is very similar to Y code except
// instead of 8 different pixels, its 4 pixels of U and 4 of V
movdqa xmm1, xmm0
pmaddubsw xmm0, xmm7 // U
pmaddubsw xmm1, xmm6 // V
palignr xmm2, xmm0, 2 // AR xx AR xx AR xx AR xx
paddw xmm2, xmm0 // BGRA xx BGRA xx BGRA xx BGRA xx
pand xmm2, xmm4 // BGRA 00 BGRA 00 BGRA 00 BGRA 00
palignr xmm3, xmm1, 2
paddw xmm3, xmm1
pand xmm3, xmm4 // BGRA 00 BGRA 00 BGRA 00 BGRA 00
psraw xmm2, 8
psraw xmm3, 8
packsswb xmm2, xmm3 // BGRA BGRA BGRA BGRA BGRA BGRA BGRA BGRA
paddb xmm2, xmm5 // -> unsigned
packuswb xmm2, xmm2 // 8 bytes. 4 U, 4 V
// step 3 - store 4 U and 4 V values
movd dword ptr [edx], xmm2 // U
lea edx, [edx + 4]
pshufd xmm0, xmm2, 0x55 // V
movd dword ptr [edi], xmm0
lea edi, [edi + 4]
sub ecx, 8
ja convertloop
pop edi
pop esi
ret
}
}
static inline int RGBToY(uint8 r, uint8 g, uint8 b) {
return (( 66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
}
static inline int RGBToU(uint8 r, uint8 g, uint8 b) {
return ((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
}
static inline int RGBToV(uint8 r, uint8 g, uint8 b) {
return ((112 * r - 94 * g - 18 * b + 128) >> 8) + 128;
}
void ARGBToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {
for (int x = 0; x < width; ++x) {
dst_y[0] = RGBToY(src_argb0[2], src_argb0[1], src_argb0[0]);
src_argb0 += 4;
dst_y += 1;
}
}
void ARGBToUVRow_C(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
const uint8* src_argb1 = src_argb0 + src_stride_argb;
for (int x = 0; x < width - 1; x += 2) {
uint8 ab = (src_argb0[0] + src_argb0[4] + src_argb1[0] + src_argb1[4]) >> 2;
uint8 ag = (src_argb0[1] + src_argb0[5] + src_argb1[1] + src_argb1[5]) >> 2;
uint8 ar = (src_argb0[2] + src_argb0[6] + src_argb1[2] + src_argb1[6]) >> 2;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
src_argb0 += 8;
src_argb1 += 8;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8 ab = (src_argb0[0] + src_argb1[0]) >> 1;
uint8 ag = (src_argb0[1] + src_argb1[1]) >> 1;
uint8 ar = (src_argb0[2] + src_argb1[2]) >> 1;
dst_u[0] = RGBToU(ar, ag, ab);
dst_v[0] = RGBToV(ar, ag, ab);
}
}
__declspec(naked) __declspec(naked)
void FastConvertYUVToRGB32Row(const uint8* y_buf, void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf, const uint8* u_buf,
...@@ -200,4 +370,6 @@ void FastConvertYToRGB32Row(const uint8* y_buf, ...@@ -200,4 +370,6 @@ void FastConvertYToRGB32Row(const uint8* y_buf,
} }
} }
#endif
} // extern "C" } // extern "C"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment