Commit 7db43f9f authored by Rostislav Vasilikhin's avatar Rostislav Vasilikhin Committed by Vadim Pisarevsky

fixed wrong equivalence in YUV conversion (#7481)

* fixed wrong equivalence in YUV conversion

* fixed channel order from YVU to YUV
parent b8c875d4
...@@ -105,6 +105,39 @@ ...@@ -105,6 +105,39 @@
namespace cv namespace cv
{ {
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
const float B2YF = 0.114f;
const float G2YF = 0.587f;
const float R2YF = 0.299f;
//to YCbCr
const float YCBF = 0.564f; // == 1/2/(1-B2YF)
const float YCRF = 0.713f; // == 1/2/(1-R2YF)
const int YCBI = 9241; // == YCBF*16384
const int YCRI = 11682; // == YCRF*16384
//to YUV
const float B2UF = 0.492f;
const float R2VF = 0.877f;
const int B2UI = 8061; // == B2UF*16384
const int R2VI = 14369; // == R2VF*16384
//from YUV
const float U2BF = 2.032f;
const float U2GF = -0.395f;
const float V2GF = -0.581f;
const float V2RF = 1.140f;
const int U2BI = 33292;
const int U2GI = -6472;
const int V2GI = -9519;
const int V2RI = 18678;
//from YCrCb
const float CB2BF = 1.773f;
const float CB2GF = -0.344f;
const float CR2GF = -0.714f;
const float CR2RF = 1.403f;
const int CB2BI = 29049;
const int CB2GI = -5636;
const int CR2GI = -11698;
const int CR2RI = 22987;
// computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n // computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab) template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
...@@ -499,9 +532,9 @@ struct IPPColor2GrayFunctor ...@@ -499,9 +532,9 @@ struct IPPColor2GrayFunctor
IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : IPPColor2GrayFunctor(ippiColor2GrayFunc _func) :
ippiColorToGray(_func) ippiColorToGray(_func)
{ {
coeffs[0] = 0.114f; coeffs[0] = B2YF;
coeffs[1] = 0.587f; coeffs[1] = G2YF;
coeffs[2] = 0.299f; coeffs[2] = R2YF;
} }
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{ {
...@@ -1143,9 +1176,9 @@ enum ...@@ -1143,9 +1176,9 @@ enum
{ {
yuv_shift = 14, yuv_shift = 14,
xyz_shift = 12, xyz_shift = 12,
R2Y = 4899, R2Y = 4899, // == R2YF*16384
G2Y = 9617, G2Y = 9617, // == G2YF*16384
B2Y = 1868, B2Y = 1868, // == B2YF*16384
BLOCK_SIZE = 256 BLOCK_SIZE = 256
}; };
...@@ -1313,7 +1346,7 @@ template<typename _Tp> struct RGB2Gray ...@@ -1313,7 +1346,7 @@ template<typename _Tp> struct RGB2Gray
RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
{ {
static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; static const float coeffs0[] = { R2YF, G2YF, B2YF };
memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
if(blueIdx == 0) if(blueIdx == 0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -1460,7 +1493,7 @@ struct RGB2Gray<float> ...@@ -1460,7 +1493,7 @@ struct RGB2Gray<float>
RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
{ {
static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; static const float coeffs0[] = { R2YF, G2YF, B2YF };
memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
if(blueIdx == 0) if(blueIdx == 0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -1650,7 +1683,7 @@ struct RGB2Gray<float> ...@@ -1650,7 +1683,7 @@ struct RGB2Gray<float>
RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
{ {
static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; static const float coeffs0[] = { R2YF, G2YF, B2YF };
memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) );
if(blueIdx == 0) if(blueIdx == 0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -1772,16 +1805,18 @@ template<typename _Tp> struct RGB2YCrCb_f ...@@ -1772,16 +1805,18 @@ template<typename _Tp> struct RGB2YCrCb_f
{ {
typedef _Tp channel_type; typedef _Tp channel_type;
RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx) RGB2YCrCb_f(int _srccn, int _blueIdx, bool _isCrCb) : srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; static const float coeffs_crb[] = { R2YF, G2YF, B2YF, YCRF, YCBF };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const float coeffs_yuv[] = { R2YF, G2YF, B2YF, R2VF, B2UF };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if(blueIdx==0) std::swap(coeffs[0], coeffs[2]); if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
} }
void operator()(const _Tp* src, _Tp* dst, int n) const void operator()(const _Tp* src, _Tp* dst, int n) const
{ {
int scn = srccn, bidx = blueIdx; int scn = srccn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const _Tp delta = ColorChannel<_Tp>::half(); const _Tp delta = ColorChannel<_Tp>::half();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
n *= 3; n *= 3;
...@@ -1790,10 +1825,11 @@ template<typename _Tp> struct RGB2YCrCb_f ...@@ -1790,10 +1825,11 @@ template<typename _Tp> struct RGB2YCrCb_f
_Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2); _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2);
_Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta); _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta);
_Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta); _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta);
dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; dst[i] = Y; dst[i+1+yuvOrder] = Cr; dst[i+2-yuvOrder] = Cb;
} }
} }
int srccn, blueIdx; int srccn, blueIdx;
bool isCrCb;
float coeffs[5]; float coeffs[5];
}; };
...@@ -1804,11 +1840,12 @@ struct RGB2YCrCb_f<float> ...@@ -1804,11 +1840,12 @@ struct RGB2YCrCb_f<float>
{ {
typedef float channel_type; typedef float channel_type;
RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : RGB2YCrCb_f(int _srccn, int _blueIdx, bool _isCrCb) :
srccn(_srccn), blueIdx(_blueIdx) srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; static const float coeffs_crb[] = { R2YF, G2YF, B2YF, YCRF, YCBF };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const float coeffs_yuv[] = { R2YF, G2YF, B2YF, R2VF, B2UF };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if(blueIdx==0) if(blueIdx==0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -1823,6 +1860,7 @@ struct RGB2YCrCb_f<float> ...@@ -1823,6 +1860,7 @@ struct RGB2YCrCb_f<float>
void operator()(const float * src, float * dst, int n) const void operator()(const float * src, float * dst, int n) const
{ {
int scn = srccn, bidx = blueIdx, i = 0; int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half(); const float delta = ColorChannel<float>::half();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
n *= 3; n *= 3;
...@@ -1832,8 +1870,8 @@ struct RGB2YCrCb_f<float> ...@@ -1832,8 +1870,8 @@ struct RGB2YCrCb_f<float>
{ {
float32x4x3_t v_src = vld3q_f32(src), v_dst; float32x4x3_t v_src = vld3q_f32(src), v_dst;
v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3); v_dst.val[1+yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4); v_dst.val[2-yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
vst3q_f32(dst + i, v_dst); vst3q_f32(dst + i, v_dst);
} }
...@@ -1843,8 +1881,8 @@ struct RGB2YCrCb_f<float> ...@@ -1843,8 +1881,8 @@ struct RGB2YCrCb_f<float>
float32x4x4_t v_src = vld4q_f32(src); float32x4x4_t v_src = vld4q_f32(src);
float32x4x3_t v_dst; float32x4x3_t v_dst;
v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2);
v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3); v_dst.val[1+yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3);
v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4); v_dst.val[2-yuvOrder] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4);
vst3q_f32(dst + i, v_dst); vst3q_f32(dst + i, v_dst);
} }
...@@ -1854,10 +1892,11 @@ struct RGB2YCrCb_f<float> ...@@ -1854,10 +1892,11 @@ struct RGB2YCrCb_f<float>
float Y = src[0]*C0 + src[1]*C1 + src[2]*C2; float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
float Cr = (src[bidx^2] - Y)*C3 + delta; float Cr = (src[bidx^2] - Y)*C3 + delta;
float Cb = (src[bidx] - Y)*C4 + delta; float Cb = (src[bidx] - Y)*C4 + delta;
dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; dst[i] = Y; dst[i+1+yuvOrder] = Cr; dst[i+2-yuvOrder] = Cb;
} }
} }
int srccn, blueIdx; int srccn, blueIdx;
bool isCrCb;
float coeffs[5]; float coeffs[5];
float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta; float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
}; };
...@@ -1869,11 +1908,12 @@ struct RGB2YCrCb_f<float> ...@@ -1869,11 +1908,12 @@ struct RGB2YCrCb_f<float>
{ {
typedef float channel_type; typedef float channel_type;
RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : RGB2YCrCb_f(int _srccn, int _blueIdx, bool _isCrCb) :
srccn(_srccn), blueIdx(_blueIdx) srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; static const float coeffs_crb[] = { R2YF, G2YF, B2YF, YCRF, YCBF };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const float coeffs_yuv[] = { R2YF, G2YF, B2YF, R2VF, B2UF };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0) if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -1901,6 +1941,7 @@ struct RGB2YCrCb_f<float> ...@@ -1901,6 +1941,7 @@ struct RGB2YCrCb_f<float>
void operator()(const float * src, float * dst, int n) const void operator()(const float * src, float * dst, int n) const
{ {
int scn = srccn, bidx = blueIdx, i = 0; int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half(); const float delta = ColorChannel<float>::half();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
n *= 3; n *= 3;
...@@ -1934,14 +1975,19 @@ struct RGB2YCrCb_f<float> ...@@ -1934,14 +1975,19 @@ struct RGB2YCrCb_f<float>
process(v_r1, v_g1, v_b1, process(v_r1, v_g1, v_b1,
v_y1, v_cr1, v_cb1); v_y1, v_cr1, v_cb1);
_mm_interleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); if(isCrCb)
_mm_interleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
else //YUV
{
_mm_interleave_ps(v_y0, v_y1, v_cb0, v_cb1, v_cr0, v_cr1);
}
_mm_storeu_ps(dst + i, v_y0); _mm_storeu_ps(dst + i, v_y0);
_mm_storeu_ps(dst + i + 4, v_y1); _mm_storeu_ps(dst + i + 4, v_y1);
_mm_storeu_ps(dst + i + 8, v_cr0); _mm_storeu_ps(dst + i + 8 + yuvOrder*8, v_cr0);
_mm_storeu_ps(dst + i + 12, v_cr1); _mm_storeu_ps(dst + i + 12 + yuvOrder*8, v_cr1);
_mm_storeu_ps(dst + i + 16, v_cb0); _mm_storeu_ps(dst + i + 16 - yuvOrder*8, v_cb0);
_mm_storeu_ps(dst + i + 20, v_cb1); _mm_storeu_ps(dst + i + 20 - yuvOrder*8, v_cb1);
} }
} }
...@@ -1950,10 +1996,11 @@ struct RGB2YCrCb_f<float> ...@@ -1950,10 +1996,11 @@ struct RGB2YCrCb_f<float>
float Y = src[0]*C0 + src[1]*C1 + src[2]*C2; float Y = src[0]*C0 + src[1]*C1 + src[2]*C2;
float Cr = (src[bidx^2] - Y)*C3 + delta; float Cr = (src[bidx^2] - Y)*C3 + delta;
float Cb = (src[bidx] - Y)*C4 + delta; float Cb = (src[bidx] - Y)*C4 + delta;
dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; dst[i] = Y; dst[i+1+yuvOrder] = Cr; dst[i+2-yuvOrder] = Cb;
} }
} }
int srccn, blueIdx; int srccn, blueIdx;
bool isCrCb;
float coeffs[5]; float coeffs[5];
__m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_delta; __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_delta;
bool haveSIMD; bool haveSIMD;
...@@ -1965,16 +2012,18 @@ template<typename _Tp> struct RGB2YCrCb_i ...@@ -1965,16 +2012,18 @@ template<typename _Tp> struct RGB2YCrCb_i
{ {
typedef _Tp channel_type; typedef _Tp channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx) : srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if(blueIdx==0) std::swap(coeffs[0], coeffs[2]); if(blueIdx==0) std::swap(coeffs[0], coeffs[2]);
} }
void operator()(const _Tp* src, _Tp* dst, int n) const void operator()(const _Tp* src, _Tp* dst, int n) const
{ {
int scn = srccn, bidx = blueIdx; int scn = srccn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift); int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift);
n *= 3; n *= 3;
...@@ -1984,11 +2033,12 @@ template<typename _Tp> struct RGB2YCrCb_i ...@@ -1984,11 +2033,12 @@ template<typename _Tp> struct RGB2YCrCb_i
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<_Tp>(Y); dst[i] = saturate_cast<_Tp>(Y);
dst[i+1] = saturate_cast<_Tp>(Cr); dst[i+1+yuvOrder] = saturate_cast<_Tp>(Cr);
dst[i+2] = saturate_cast<_Tp>(Cb); dst[i+2-yuvOrder] = saturate_cast<_Tp>(Cb);
} }
} }
int srccn, blueIdx; int srccn, blueIdx;
bool isCrCb;
int coeffs[5]; int coeffs[5];
}; };
...@@ -1999,11 +2049,12 @@ struct RGB2YCrCb_i<uchar> ...@@ -1999,11 +2049,12 @@ struct RGB2YCrCb_i<uchar>
{ {
typedef uchar channel_type; typedef uchar channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx) : srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0) if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -2019,6 +2070,7 @@ struct RGB2YCrCb_i<uchar> ...@@ -2019,6 +2070,7 @@ struct RGB2YCrCb_i<uchar>
void operator()(const uchar * src, uchar * dst, int n) const void operator()(const uchar * src, uchar * dst, int n) const
{ {
int scn = srccn, bidx = blueIdx, i = 0; int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<uchar>::half()*(1 << yuv_shift); int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
n *= 3; n *= 3;
...@@ -2067,8 +2119,8 @@ struct RGB2YCrCb_i<uchar> ...@@ -2067,8 +2119,8 @@ struct RGB2YCrCb_i<uchar>
v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift); v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1))); v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1)));
v_dst.val[1] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1))); v_dst.val[1+yuvOrder] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1)));
v_dst.val[2] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1))); v_dst.val[2-yuvOrder] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1)));
vst3_u8(dst + i, v_dst); vst3_u8(dst + i, v_dst);
} }
...@@ -2079,11 +2131,12 @@ struct RGB2YCrCb_i<uchar> ...@@ -2079,11 +2131,12 @@ struct RGB2YCrCb_i<uchar>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<uchar>(Y); dst[i] = saturate_cast<uchar>(Y);
dst[i+1] = saturate_cast<uchar>(Cr); dst[i+1+yuvOrder] = saturate_cast<uchar>(Cr);
dst[i+2] = saturate_cast<uchar>(Cb); dst[i+2-yuvOrder] = saturate_cast<uchar>(Cb);
} }
} }
int srccn, blueIdx, coeffs[5]; int srccn, blueIdx, coeffs[5];
bool isCrCb;
int16x4_t v_c0, v_c1, v_c2; int16x4_t v_c0, v_c1, v_c2;
int32x4_t v_c3, v_c4, v_delta, v_delta2; int32x4_t v_c3, v_c4, v_delta, v_delta2;
}; };
...@@ -2093,11 +2146,12 @@ struct RGB2YCrCb_i<ushort> ...@@ -2093,11 +2146,12 @@ struct RGB2YCrCb_i<ushort>
{ {
typedef ushort channel_type; typedef ushort channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx) : srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0) if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -2113,6 +2167,7 @@ struct RGB2YCrCb_i<ushort> ...@@ -2113,6 +2167,7 @@ struct RGB2YCrCb_i<ushort>
void operator()(const ushort * src, ushort * dst, int n) const void operator()(const ushort * src, ushort * dst, int n) const
{ {
int scn = srccn, bidx = blueIdx, i = 0; int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<ushort>::half()*(1 << yuv_shift); int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
n *= 3; n *= 3;
...@@ -2155,8 +2210,8 @@ struct RGB2YCrCb_i<ushort> ...@@ -2155,8 +2210,8 @@ struct RGB2YCrCb_i<ushort>
v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift); v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift);
v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1)); v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1));
v_dst.val[1] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1)); v_dst.val[1+yuvOrder] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1));
v_dst.val[2] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1)); v_dst.val[2-yuvOrder] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1));
vst3q_u16(dst + i, v_dst); vst3q_u16(dst + i, v_dst);
} }
...@@ -2189,8 +2244,8 @@ struct RGB2YCrCb_i<ushort> ...@@ -2189,8 +2244,8 @@ struct RGB2YCrCb_i<ushort>
v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift); v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift);
v_dst.val[0] = vqmovun_s32(v_Y); v_dst.val[0] = vqmovun_s32(v_Y);
v_dst.val[1] = vqmovun_s32(v_Cr); v_dst.val[1+yuvOrder] = vqmovun_s32(v_Cr);
v_dst.val[2] = vqmovun_s32(v_Cb); v_dst.val[2-yuvOrder] = vqmovun_s32(v_Cb);
vst3_u16(dst + i, v_dst); vst3_u16(dst + i, v_dst);
} }
...@@ -2201,11 +2256,12 @@ struct RGB2YCrCb_i<ushort> ...@@ -2201,11 +2256,12 @@ struct RGB2YCrCb_i<ushort>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<ushort>(Y); dst[i] = saturate_cast<ushort>(Y);
dst[i+1] = saturate_cast<ushort>(Cr); dst[i+1+yuvOrder] = saturate_cast<ushort>(Cr);
dst[i+2] = saturate_cast<ushort>(Cb); dst[i+2-yuvOrder] = saturate_cast<ushort>(Cb);
} }
} }
int srccn, blueIdx, coeffs[5]; int srccn, blueIdx, coeffs[5];
bool isCrCb;
int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2; int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2;
}; };
...@@ -2216,11 +2272,12 @@ struct RGB2YCrCb_i<uchar> ...@@ -2216,11 +2272,12 @@ struct RGB2YCrCb_i<uchar>
{ {
typedef uchar channel_type; typedef uchar channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx) : srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0) if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -2229,7 +2286,10 @@ struct RGB2YCrCb_i<uchar> ...@@ -2229,7 +2286,10 @@ struct RGB2YCrCb_i<uchar>
v_delta_32 = _mm_set1_epi32(delta); v_delta_32 = _mm_set1_epi32(delta);
short delta2 = 1 + ColorChannel<uchar>::half() * 2; short delta2 = 1 + ColorChannel<uchar>::half() * 2;
v_coeff = _mm_set_epi16(delta2, (short)coeffs[4], delta2, (short)coeffs[3], delta2, (short)coeffs[4], delta2, (short)coeffs[3]); v_coeff = _mm_set_epi16(delta2, (short)coeffs[4], delta2, (short)coeffs[3], delta2, (short)coeffs[4], delta2, (short)coeffs[3]);
v_shuffle2 = _mm_set_epi8(0x0, 0x0, 0x0, 0x0, 0xf, 0xe, 0xc, 0xb, 0xa, 0x8, 0x7, 0x6, 0x4, 0x3, 0x2, 0x0); if(isCrCb)
v_shuffle2 = _mm_set_epi8(0x0, 0x0, 0x0, 0x0, 0xf, 0xe, 0xc, 0xb, 0xa, 0x8, 0x7, 0x6, 0x4, 0x3, 0x2, 0x0);
else //if YUV
v_shuffle2 = _mm_set_epi8(0x0, 0x0, 0x0, 0x0, 0xe, 0xf, 0xc, 0xa, 0xb, 0x8, 0x6, 0x7, 0x4, 0x2, 0x3, 0x0);
haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1); haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1);
} }
...@@ -2283,6 +2343,7 @@ struct RGB2YCrCb_i<uchar> ...@@ -2283,6 +2343,7 @@ struct RGB2YCrCb_i<uchar>
void operator()(const uchar * src, uchar * dst, int n) const void operator()(const uchar * src, uchar * dst, int n) const
{ {
int scn = srccn, bidx = blueIdx, i = 0; int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<uchar>::half()*(1 << yuv_shift); int delta = ColorChannel<uchar>::half()*(1 << yuv_shift);
n *= 3; n *= 3;
...@@ -2359,8 +2420,8 @@ struct RGB2YCrCb_i<uchar> ...@@ -2359,8 +2420,8 @@ struct RGB2YCrCb_i<uchar>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<uchar>(Y); dst[i] = saturate_cast<uchar>(Y);
dst[i+1] = saturate_cast<uchar>(Cr); dst[i+1+yuvOrder] = saturate_cast<uchar>(Cr);
dst[i+2] = saturate_cast<uchar>(Cb); dst[i+2-yuvOrder] = saturate_cast<uchar>(Cb);
} }
} }
...@@ -2368,6 +2429,7 @@ struct RGB2YCrCb_i<uchar> ...@@ -2368,6 +2429,7 @@ struct RGB2YCrCb_i<uchar>
__m128i v_coeff; __m128i v_coeff;
__m128i v_shuffle2; __m128i v_shuffle2;
int srccn, blueIdx, coeffs[5]; int srccn, blueIdx, coeffs[5];
bool isCrCb;
bool haveSIMD; bool haveSIMD;
}; };
...@@ -2376,11 +2438,12 @@ struct RGB2YCrCb_i<ushort> ...@@ -2376,11 +2438,12 @@ struct RGB2YCrCb_i<ushort>
{ {
typedef ushort channel_type; typedef ushort channel_type;
RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) RGB2YCrCb_i(int _srccn, int _blueIdx, bool _isCrCb)
: srccn(_srccn), blueIdx(_blueIdx) : srccn(_srccn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; static const int coeffs_crb[] = { R2Y, G2Y, B2Y, YCRI, YCBI };
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); static const int coeffs_yuv[] = { R2Y, G2Y, B2Y, R2VI, B2UI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 5*sizeof(coeffs[0]));
if (blueIdx==0) if (blueIdx==0)
std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[0], coeffs[2]);
...@@ -2437,6 +2500,7 @@ struct RGB2YCrCb_i<ushort> ...@@ -2437,6 +2500,7 @@ struct RGB2YCrCb_i<ushort>
void operator()(const ushort * src, ushort * dst, int n) const void operator()(const ushort * src, ushort * dst, int n) const
{ {
int scn = srccn, bidx = blueIdx, i = 0; int scn = srccn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4];
int delta = ColorChannel<ushort>::half()*(1 << yuv_shift); int delta = ColorChannel<ushort>::half()*(1 << yuv_shift);
n *= 3; n *= 3;
...@@ -2471,14 +2535,17 @@ struct RGB2YCrCb_i<ushort> ...@@ -2471,14 +2535,17 @@ struct RGB2YCrCb_i<ushort>
process(v_r1, v_g1, v_b1, process(v_r1, v_g1, v_b1,
v_y1, v_cr1, v_cb1); v_y1, v_cr1, v_cb1);
_mm_interleave_epi16(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); if(isCrCb)
_mm_interleave_epi16(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1);
else //YUV
_mm_interleave_epi16(v_y0, v_y1, v_cb0, v_cb1, v_cr0, v_cr1);
_mm_storeu_si128((__m128i *)(dst + i), v_y0); _mm_storeu_si128((__m128i *)(dst + i), v_y0);
_mm_storeu_si128((__m128i *)(dst + i + 8), v_y1); _mm_storeu_si128((__m128i *)(dst + i + 8), v_y1);
_mm_storeu_si128((__m128i *)(dst + i + 16), v_cr0); _mm_storeu_si128((__m128i *)(dst + i + 16 + yuvOrder*16), v_cr0);
_mm_storeu_si128((__m128i *)(dst + i + 24), v_cr1); _mm_storeu_si128((__m128i *)(dst + i + 24 + yuvOrder*16), v_cr1);
_mm_storeu_si128((__m128i *)(dst + i + 32), v_cb0); _mm_storeu_si128((__m128i *)(dst + i + 32 - yuvOrder*16), v_cb0);
_mm_storeu_si128((__m128i *)(dst + i + 40), v_cb1); _mm_storeu_si128((__m128i *)(dst + i + 40 - yuvOrder*16), v_cb1);
} }
} }
...@@ -2488,12 +2555,13 @@ struct RGB2YCrCb_i<ushort> ...@@ -2488,12 +2555,13 @@ struct RGB2YCrCb_i<ushort>
int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift);
int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift);
dst[i] = saturate_cast<ushort>(Y); dst[i] = saturate_cast<ushort>(Y);
dst[i+1] = saturate_cast<ushort>(Cr); dst[i+1+yuvOrder] = saturate_cast<ushort>(Cr);
dst[i+2] = saturate_cast<ushort>(Cb); dst[i+2-yuvOrder] = saturate_cast<ushort>(Cb);
} }
} }
int srccn, blueIdx, coeffs[5]; int srccn, blueIdx, coeffs[5];
bool isCrCb;
__m128i v_c0, v_c1, v_c2; __m128i v_c0, v_c1, v_c2;
__m128i v_c3, v_c4, v_delta, v_delta2; __m128i v_c3, v_c4, v_delta, v_delta2;
__m128i v_zero; __m128i v_zero;
...@@ -2506,23 +2574,25 @@ template<typename _Tp> struct YCrCb2RGB_f ...@@ -2506,23 +2574,25 @@ template<typename _Tp> struct YCrCb2RGB_f
{ {
typedef _Tp channel_type; typedef _Tp channel_type;
YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) YCrCb2RGB_f(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx) : dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; static const float coeffs_cbr[] = {CR2RF, CR2GF, CB2GF, CB2BF};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); static const float coeffs_yuv[] = { V2RF, V2GF, U2GF, U2BF};
memcpy(coeffs, isCrCb ? coeffs_cbr : coeffs_yuv, 4*sizeof(coeffs[0]));
} }
void operator()(const _Tp* src, _Tp* dst, int n) const void operator()(const _Tp* src, _Tp* dst, int n) const
{ {
int dcn = dstcn, bidx = blueIdx; int dcn = dstcn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max(); const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3; n *= 3;
for(int i = 0; i < n; i += 3, dst += dcn) for(int i = 0; i < n; i += 3, dst += dcn)
{ {
_Tp Y = src[i]; _Tp Y = src[i];
_Tp Cr = src[i+1]; _Tp Cr = src[i+1+yuvOrder];
_Tp Cb = src[i+2]; _Tp Cb = src[i+2-yuvOrder];
_Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3); _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3);
_Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1); _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1);
...@@ -2534,6 +2604,7 @@ template<typename _Tp> struct YCrCb2RGB_f ...@@ -2534,6 +2604,7 @@ template<typename _Tp> struct YCrCb2RGB_f
} }
} }
int dstcn, blueIdx; int dstcn, blueIdx;
bool isCrCb;
float coeffs[4]; float coeffs[4];
}; };
...@@ -2544,11 +2615,12 @@ struct YCrCb2RGB_f<float> ...@@ -2544,11 +2615,12 @@ struct YCrCb2RGB_f<float>
{ {
typedef float channel_type; typedef float channel_type;
YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) YCrCb2RGB_f(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx) : dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; static const float coeffs_cbr[] = {CR2RF, CR2GF, CB2GF, CB2BF};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); static const float coeffs_yuv[] = { V2RF, V2GF, U2GF, U2BF};
memcpy(coeffs, isCrCb ? coeffs_cbr : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = vdupq_n_f32(coeffs[0]); v_c0 = vdupq_n_f32(coeffs[0]);
v_c1 = vdupq_n_f32(coeffs[1]); v_c1 = vdupq_n_f32(coeffs[1]);
...@@ -2561,6 +2633,7 @@ struct YCrCb2RGB_f<float> ...@@ -2561,6 +2633,7 @@ struct YCrCb2RGB_f<float>
void operator()(const float* src, float* dst, int n) const void operator()(const float* src, float* dst, int n) const
{ {
int dcn = dstcn, bidx = blueIdx, i = 0; int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max(); const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3; n *= 3;
...@@ -2569,7 +2642,7 @@ struct YCrCb2RGB_f<float> ...@@ -2569,7 +2642,7 @@ struct YCrCb2RGB_f<float>
for ( ; i <= n - 12; i += 12, dst += 12) for ( ; i <= n - 12; i += 12, dst += 12)
{ {
float32x4x3_t v_src = vld3q_f32(src + i), v_dst; float32x4x3_t v_src = vld3q_f32(src + i), v_dst;
float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2]; float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1+yuvOrder], v_Cb = v_src.val[2-yuvOrder];
v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3); v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y); v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
...@@ -2582,7 +2655,7 @@ struct YCrCb2RGB_f<float> ...@@ -2582,7 +2655,7 @@ struct YCrCb2RGB_f<float>
{ {
float32x4x3_t v_src = vld3q_f32(src + i); float32x4x3_t v_src = vld3q_f32(src + i);
float32x4x4_t v_dst; float32x4x4_t v_dst;
float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2]; float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1+yuvOrder], v_Cb = v_src.val[2-yuvOrder];
v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3); v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3);
v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y); v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y);
...@@ -2594,7 +2667,7 @@ struct YCrCb2RGB_f<float> ...@@ -2594,7 +2667,7 @@ struct YCrCb2RGB_f<float>
for ( ; i < n; i += 3, dst += dcn) for ( ; i < n; i += 3, dst += dcn)
{ {
float Y = src[i], Cr = src[i+1], Cb = src[i+2]; float Y = src[i], Cr = src[i+1+yuvOrder], Cb = src[i+2-yuvOrder];
float b = Y + (Cb - delta)*C3; float b = Y + (Cb - delta)*C3;
float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1; float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
...@@ -2606,6 +2679,7 @@ struct YCrCb2RGB_f<float> ...@@ -2606,6 +2679,7 @@ struct YCrCb2RGB_f<float>
} }
} }
int dstcn, blueIdx; int dstcn, blueIdx;
bool isCrCb;
float coeffs[4]; float coeffs[4];
float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta; float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
}; };
...@@ -2617,11 +2691,12 @@ struct YCrCb2RGB_f<float> ...@@ -2617,11 +2691,12 @@ struct YCrCb2RGB_f<float>
{ {
typedef float channel_type; typedef float channel_type;
YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) YCrCb2RGB_f(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx) : dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; static const float coeffs_cbr[] = {CR2RF, CR2GF, CB2GF, CB2BF};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); static const float coeffs_yuv[] = { V2RF, V2GF, U2GF, U2BF};
memcpy(coeffs, isCrCb ? coeffs_cbr : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = _mm_set1_ps(coeffs[0]); v_c0 = _mm_set1_ps(coeffs[0]);
v_c1 = _mm_set1_ps(coeffs[1]); v_c1 = _mm_set1_ps(coeffs[1]);
...@@ -2639,6 +2714,9 @@ struct YCrCb2RGB_f<float> ...@@ -2639,6 +2714,9 @@ struct YCrCb2RGB_f<float>
v_cb = _mm_sub_ps(v_cb, v_delta); v_cb = _mm_sub_ps(v_cb, v_delta);
v_cr = _mm_sub_ps(v_cr, v_delta); v_cr = _mm_sub_ps(v_cr, v_delta);
if (!isCrCb)
std::swap(v_cb, v_cr);
v_b = _mm_mul_ps(v_cb, v_c3); v_b = _mm_mul_ps(v_cb, v_c3);
v_g = _mm_add_ps(_mm_mul_ps(v_cb, v_c2), _mm_mul_ps(v_cr, v_c1)); v_g = _mm_add_ps(_mm_mul_ps(v_cb, v_c2), _mm_mul_ps(v_cr, v_c1));
v_r = _mm_mul_ps(v_cr, v_c0); v_r = _mm_mul_ps(v_cr, v_c0);
...@@ -2654,6 +2732,7 @@ struct YCrCb2RGB_f<float> ...@@ -2654,6 +2732,7 @@ struct YCrCb2RGB_f<float>
void operator()(const float* src, float* dst, int n) const void operator()(const float* src, float* dst, int n) const
{ {
int dcn = dstcn, bidx = blueIdx, i = 0; int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max(); const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max();
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3; n *= 3;
...@@ -2704,7 +2783,7 @@ struct YCrCb2RGB_f<float> ...@@ -2704,7 +2783,7 @@ struct YCrCb2RGB_f<float>
for ( ; i < n; i += 3, dst += dcn) for ( ; i < n; i += 3, dst += dcn)
{ {
float Y = src[i], Cr = src[i+1], Cb = src[i+2]; float Y = src[i], Cr = src[i+1+yuvOrder], Cb = src[i+2-yuvOrder];
float b = Y + (Cb - delta)*C3; float b = Y + (Cb - delta)*C3;
float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1; float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1;
...@@ -2716,6 +2795,7 @@ struct YCrCb2RGB_f<float> ...@@ -2716,6 +2795,7 @@ struct YCrCb2RGB_f<float>
} }
} }
int dstcn, blueIdx; int dstcn, blueIdx;
bool isCrCb;
float coeffs[4]; float coeffs[4];
__m128 v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta; __m128 v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta;
...@@ -2728,24 +2808,26 @@ template<typename _Tp> struct YCrCb2RGB_i ...@@ -2728,24 +2808,26 @@ template<typename _Tp> struct YCrCb2RGB_i
{ {
typedef _Tp channel_type; typedef _Tp channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx) : dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {22987, -11698, -5636, 29049}; static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
} }
void operator()(const _Tp* src, _Tp* dst, int n) const void operator()(const _Tp* src, _Tp* dst, int n) const
{ {
int dcn = dstcn, bidx = blueIdx; int dcn = dstcn, bidx = blueIdx;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max(); const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3; n *= 3;
for(int i = 0; i < n; i += 3, dst += dcn) for(int i = 0; i < n; i += 3, dst += dcn)
{ {
_Tp Y = src[i]; _Tp Y = src[i];
_Tp Cr = src[i+1]; _Tp Cr = src[i+1+yuvOrder];
_Tp Cb = src[i+2]; _Tp Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
...@@ -2759,6 +2841,7 @@ template<typename _Tp> struct YCrCb2RGB_i ...@@ -2759,6 +2841,7 @@ template<typename _Tp> struct YCrCb2RGB_i
} }
} }
int dstcn, blueIdx; int dstcn, blueIdx;
bool isCrCb;
int coeffs[4]; int coeffs[4];
}; };
...@@ -2769,11 +2852,12 @@ struct YCrCb2RGB_i<uchar> ...@@ -2769,11 +2852,12 @@ struct YCrCb2RGB_i<uchar>
{ {
typedef uchar channel_type; typedef uchar channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx) : dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {22987, -11698, -5636, 29049}; static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = vdupq_n_s32(coeffs[0]); v_c0 = vdupq_n_s32(coeffs[0]);
v_c1 = vdupq_n_s32(coeffs[1]); v_c1 = vdupq_n_s32(coeffs[1]);
...@@ -2787,6 +2871,7 @@ struct YCrCb2RGB_i<uchar> ...@@ -2787,6 +2871,7 @@ struct YCrCb2RGB_i<uchar>
void operator()(const uchar* src, uchar* dst, int n) const void operator()(const uchar* src, uchar* dst, int n) const
{ {
int dcn = dstcn, bidx = blueIdx, i = 0; int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max(); const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3; n *= 3;
...@@ -2800,8 +2885,8 @@ struct YCrCb2RGB_i<uchar> ...@@ -2800,8 +2885,8 @@ struct YCrCb2RGB_i<uchar>
v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2]));
int16x4_t v_Y = vget_low_s16(v_src16.val[0]), int16x4_t v_Y = vget_low_s16(v_src16.val[0]),
v_Cr = vget_low_s16(v_src16.val[1]), v_Cr = vget_low_s16(v_src16.val[1+yuvOrder]),
v_Cb = vget_low_s16(v_src16.val[2]); v_Cb = vget_low_s16(v_src16.val[2-yuvOrder]);
int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta)); int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y); v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
...@@ -2811,8 +2896,8 @@ struct YCrCb2RGB_i<uchar> ...@@ -2811,8 +2896,8 @@ struct YCrCb2RGB_i<uchar>
v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y); v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
v_Y = vget_high_s16(v_src16.val[0]); v_Y = vget_high_s16(v_src16.val[0]);
v_Cr = vget_high_s16(v_src16.val[1]); v_Cr = vget_high_s16(v_src16.val[1+yuvOrder]);
v_Cb = vget_high_s16(v_src16.val[2]); v_Cb = vget_high_s16(v_src16.val[2-yuvOrder]);
int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta)); int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta));
v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y); v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
...@@ -2847,8 +2932,8 @@ struct YCrCb2RGB_i<uchar> ...@@ -2847,8 +2932,8 @@ struct YCrCb2RGB_i<uchar>
for ( ; i < n; i += 3, dst += dcn) for ( ; i < n; i += 3, dst += dcn)
{ {
uchar Y = src[i]; uchar Y = src[i];
uchar Cr = src[i+1]; uchar Cr = src[i+1+yuvOrder];
uchar Cb = src[i+2]; uchar Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
...@@ -2862,6 +2947,7 @@ struct YCrCb2RGB_i<uchar> ...@@ -2862,6 +2947,7 @@ struct YCrCb2RGB_i<uchar>
} }
} }
int dstcn, blueIdx; int dstcn, blueIdx;
bool isCrCb;
int coeffs[4]; int coeffs[4];
int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2; int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2;
...@@ -2874,11 +2960,12 @@ struct YCrCb2RGB_i<ushort> ...@@ -2874,11 +2960,12 @@ struct YCrCb2RGB_i<ushort>
{ {
typedef ushort channel_type; typedef ushort channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx) : dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {22987, -11698, -5636, 29049}; static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = vdupq_n_s32(coeffs[0]); v_c0 = vdupq_n_s32(coeffs[0]);
v_c1 = vdupq_n_s32(coeffs[1]); v_c1 = vdupq_n_s32(coeffs[1]);
...@@ -2893,6 +2980,7 @@ struct YCrCb2RGB_i<ushort> ...@@ -2893,6 +2980,7 @@ struct YCrCb2RGB_i<ushort>
void operator()(const ushort* src, ushort* dst, int n) const void operator()(const ushort* src, ushort* dst, int n) const
{ {
int dcn = dstcn, bidx = blueIdx, i = 0; int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max(); const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3; n *= 3;
...@@ -2902,8 +2990,8 @@ struct YCrCb2RGB_i<ushort> ...@@ -2902,8 +2990,8 @@ struct YCrCb2RGB_i<ushort>
uint16x8x3_t v_src = vld3q_u16(src + i); uint16x8x3_t v_src = vld3q_u16(src + i);
int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))), int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))),
v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))), v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1+yuvOrder]))),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2-yuvOrder])));
int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y); v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y);
...@@ -2913,8 +3001,8 @@ struct YCrCb2RGB_i<ushort> ...@@ -2913,8 +3001,8 @@ struct YCrCb2RGB_i<ushort>
v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y); v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y);
v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))), v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))),
v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))), v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1+yuvOrder]))),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2-yuvOrder])));
int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y); v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y);
...@@ -2951,8 +3039,8 @@ struct YCrCb2RGB_i<ushort> ...@@ -2951,8 +3039,8 @@ struct YCrCb2RGB_i<ushort>
uint16x4x3_t v_src = vld3_u16(src + i); uint16x4x3_t v_src = vld3_u16(src + i);
int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])), int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])),
v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])), v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1+yuvOrder])),
v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2-yuvOrder]));
int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta));
v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y); v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y);
...@@ -2987,8 +3075,8 @@ struct YCrCb2RGB_i<ushort> ...@@ -2987,8 +3075,8 @@ struct YCrCb2RGB_i<ushort>
for ( ; i < n; i += 3, dst += dcn) for ( ; i < n; i += 3, dst += dcn)
{ {
ushort Y = src[i]; ushort Y = src[i];
ushort Cr = src[i+1]; ushort Cr = src[i+1+yuvOrder];
ushort Cb = src[i+2]; ushort Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
...@@ -3002,6 +3090,7 @@ struct YCrCb2RGB_i<ushort> ...@@ -3002,6 +3090,7 @@ struct YCrCb2RGB_i<ushort>
} }
} }
int dstcn, blueIdx; int dstcn, blueIdx;
bool isCrCb;
int coeffs[4]; int coeffs[4];
int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta; int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta;
...@@ -3016,11 +3105,12 @@ struct YCrCb2RGB_i<uchar> ...@@ -3016,11 +3105,12 @@ struct YCrCb2RGB_i<uchar>
{ {
typedef uchar channel_type; typedef uchar channel_type;
YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) YCrCb2RGB_i(int _dstcn, int _blueIdx, bool _isCrCb)
: dstcn(_dstcn), blueIdx(_blueIdx) : dstcn(_dstcn), blueIdx(_blueIdx), isCrCb(_isCrCb)
{ {
static const int coeffs0[] = {22987, -11698, -5636, 29049}; static const int coeffs_crb[] = { CR2RI, CR2GI, CB2GI, CB2BI};
memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); static const int coeffs_yuv[] = { V2RI, V2GI, U2GI, U2BI };
memcpy(coeffs, isCrCb ? coeffs_crb : coeffs_yuv, 4*sizeof(coeffs[0]));
v_c0 = _mm_set1_epi16((short)coeffs[0]); v_c0 = _mm_set1_epi16((short)coeffs[0]);
v_c1 = _mm_set1_epi16((short)coeffs[1]); v_c1 = _mm_set1_epi16((short)coeffs[1]);
...@@ -3033,7 +3123,9 @@ struct YCrCb2RGB_i<uchar> ...@@ -3033,7 +3123,9 @@ struct YCrCb2RGB_i<uchar>
uchar alpha = ColorChannel<uchar>::max(); uchar alpha = ColorChannel<uchar>::max();
v_alpha = _mm_set1_epi8(*(char *)&alpha); v_alpha = _mm_set1_epi8(*(char *)&alpha);
useSSE = coeffs[0] <= std::numeric_limits<short>::max(); // when using YUV, one of coefficients is bigger than std::numeric_limits<short>::max(),
//which is not appropriate for SSE
useSSE = isCrCb;
haveSIMD = checkHardwareSupport(CV_CPU_SSE2); haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
} }
...@@ -3152,6 +3244,7 @@ struct YCrCb2RGB_i<uchar> ...@@ -3152,6 +3244,7 @@ struct YCrCb2RGB_i<uchar>
void operator()(const uchar* src, uchar* dst, int n) const void operator()(const uchar* src, uchar* dst, int n) const
{ {
int dcn = dstcn, bidx = blueIdx, i = 0; int dcn = dstcn, bidx = blueIdx, i = 0;
int yuvOrder = !isCrCb; //1 if YUV, 0 if YCrCb
const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max(); const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max();
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3];
n *= 3; n *= 3;
...@@ -3316,8 +3409,8 @@ struct YCrCb2RGB_i<uchar> ...@@ -3316,8 +3409,8 @@ struct YCrCb2RGB_i<uchar>
for ( ; i < n; i += 3, dst += dcn) for ( ; i < n; i += 3, dst += dcn)
{ {
uchar Y = src[i]; uchar Y = src[i];
uchar Cr = src[i+1]; uchar Cr = src[i+1+yuvOrder];
uchar Cb = src[i+2]; uchar Cb = src[i+2-yuvOrder];
int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift);
int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift);
...@@ -3332,6 +3425,7 @@ struct YCrCb2RGB_i<uchar> ...@@ -3332,6 +3425,7 @@ struct YCrCb2RGB_i<uchar>
} }
int dstcn, blueIdx; int dstcn, blueIdx;
int coeffs[4]; int coeffs[4];
bool isCrCb;
bool useSSE, haveSIMD; bool useSSE, haveSIMD;
__m128i v_c0, v_c1, v_c2, v_c3, v_delta2; __m128i v_c0, v_c1, v_c2, v_c3, v_delta2;
...@@ -7992,7 +8086,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) ...@@ -7992,7 +8086,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
case COLOR_RGB2YUV: case COLOR_RGB2YUV:
{ {
CV_Assert(scn == 3 || scn == 4); CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_RGB2YUV ? 0 : 2; bidx = code == COLOR_RGB2YUV ? 2 : 0;
dcn = 3; dcn = 3;
k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc, k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc,
opts + format("-D dcn=3 -D bidx=%d", bidx)); opts + format("-D dcn=3 -D bidx=%d", bidx));
...@@ -8001,9 +8095,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) ...@@ -8001,9 +8095,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
case COLOR_YUV2BGR: case COLOR_YUV2BGR:
case COLOR_YUV2RGB: case COLOR_YUV2RGB:
{ {
if(dcn < 0) dcn = 3; if(dcn <= 0) dcn = 3;
CV_Assert(dcn == 3 || dcn == 4); CV_Assert(dcn == 3 || dcn == 4);
bidx = code == COLOR_YUV2RGB ? 0 : 2; bidx = code == COLOR_YUV2RGB ? 2 : 0;
k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc, k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc,
opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); opts + format("-D dcn=%d -D bidx=%d", dcn, bidx));
break; break;
...@@ -8858,17 +8952,13 @@ void cvtBGRtoYUV(const uchar * src_data, size_t src_step, ...@@ -8858,17 +8952,13 @@ void cvtBGRtoYUV(const uchar * src_data, size_t src_step,
} }
#endif #endif
static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 };
const float* coeffs_f = isCbCr ? 0 : yuv_f;
const int* coeffs_i = isCbCr ? 0 : yuv_i;
int blueIdx = swapBlue ? 2 : 0; int blueIdx = swapBlue ? 2 : 0;
if( depth == CV_8U ) if( depth == CV_8U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<uchar>(scn, blueIdx, coeffs_i)); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<uchar>(scn, blueIdx, isCbCr));
else if( depth == CV_16U ) else if( depth == CV_16U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<ushort>(scn, blueIdx, coeffs_i)); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_i<ushort>(scn, blueIdx, isCbCr));
else else
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_f<float>(scn, blueIdx, coeffs_f)); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, RGB2YCrCb_f<float>(scn, blueIdx, isCbCr));
} }
void cvtYUVtoBGR(const uchar * src_data, size_t src_step, void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
...@@ -8914,17 +9004,13 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step, ...@@ -8914,17 +9004,13 @@ void cvtYUVtoBGR(const uchar * src_data, size_t src_step,
} }
#endif #endif
static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f };
static const int yuv_i[] = { 33292, -6472, -9519, 18678 };
const float* coeffs_f = isCbCr ? 0 : yuv_f;
const int* coeffs_i = isCbCr ? 0 : yuv_i;
int blueIdx = swapBlue ? 2 : 0; int blueIdx = swapBlue ? 2 : 0;
if( depth == CV_8U ) if( depth == CV_8U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<uchar>(dcn, blueIdx, coeffs_i)); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<uchar>(dcn, blueIdx, isCbCr));
else if( depth == CV_16U ) else if( depth == CV_16U )
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<ushort>(dcn, blueIdx, coeffs_i)); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_i<ushort>(dcn, blueIdx, isCbCr));
else else
CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_f<float>(dcn, blueIdx, coeffs_f)); CvtColorLoop(src_data, src_step, dst_data, dst_step, width, height, YCrCb2RGB_f<float>(dcn, blueIdx, isCbCr));
} }
void cvtBGRtoXYZ(const uchar * src_data, size_t src_step, void cvtBGRtoXYZ(const uchar * src_data, size_t src_step,
......
...@@ -84,6 +84,39 @@ enum ...@@ -84,6 +84,39 @@ enum
BLOCK_SIZE = 256 BLOCK_SIZE = 256
}; };
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
#define B2YF 0.114f
#define G2YF 0.587f
#define R2YF 0.299f
//to YCbCr
#define YCBF 0.564f
#define YCRF 0.713f
#define YCBI 9241
#define YCRI 11682
//to YUV
#define B2UF 0.492f
#define R2VF 0.877f
#define B2UI 8061
#define R2VI 14369
//from YUV
#define U2BF 2.032f
#define U2GF -0.395f
#define V2GF -0.581f
#define V2RF 1.140f
#define U2BI 33292
#define U2GI -6472
#define V2GI -9519
#define V2RI 18678
//from YCrCb
#define CR2RF 1.403f
#define CB2GF -0.344f
#define CR2GF -0.714f
#define CB2BF 1.773f
#define CR2RI 22987
#define CB2GI -5636
#define CR2GI -11698
#define CB2BI 29049
#define scnbytes ((int)sizeof(DATA_TYPE)*scn) #define scnbytes ((int)sizeof(DATA_TYPE)*scn)
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn) #define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
...@@ -151,7 +184,7 @@ __kernel void RGB2Gray(__global const uchar * srcptr, int src_step, int src_offs ...@@ -151,7 +184,7 @@ __kernel void RGB2Gray(__global const uchar * srcptr, int src_step, int src_offs
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index); __global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src); DATA_TYPE_4 src_pix = vload4(0, src);
#ifdef DEPTH_5 #ifdef DEPTH_5
dst[0] = fma(src_pix.B_COMP, 0.114f, fma(src_pix.G_COMP, 0.587f, src_pix.R_COMP * 0.299f)); dst[0] = fma(src_pix.B_COMP, B2YF, fma(src_pix.G_COMP, G2YF, src_pix.R_COMP * R2YF));
#else #else
dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, mul24(src_pix.R_COMP, R2Y))), yuv_shift); dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, mul24(src_pix.R_COMP, R2Y))), yuv_shift);
#endif #endif
...@@ -201,8 +234,8 @@ __kernel void Gray2RGB(__global const uchar * srcptr, int src_step, int src_offs ...@@ -201,8 +234,8 @@ __kernel void Gray2RGB(__global const uchar * srcptr, int src_step, int src_offs
///////////////////////////////////// RGB <-> YUV ////////////////////////////////////// ///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
__constant float c_RGB2YUVCoeffs_f[5] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; __constant float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, 8061, 14369 }; __constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI };
__kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset, __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset, __global uchar* dstptr, int dst_step, int dt_offset,
...@@ -251,8 +284,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset ...@@ -251,8 +284,8 @@ __kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset
} }
} }
__constant float c_YUV2RGBCoeffs_f[4] = { 2.032f, -0.395f, -0.581f, 1.140f }; __constant float c_YUV2RGBCoeffs_f[4] = { U2BF, U2GF, V2GF, V2RF };
__constant int c_YUV2RGBCoeffs_i[4] = { 33292, -6472, -9519, 18678 }; __constant int c_YUV2RGBCoeffs_i[4] = { U2BI, U2GI, V2GI, V2RI };
__kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset, __kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset, __global uchar* dstptr, int dst_step, int dt_offset,
...@@ -624,8 +657,8 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of ...@@ -624,8 +657,8 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
__constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; __constant float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241}; __constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI};
__kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offset, __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset, __global uchar* dstptr, int dst_step, int dt_offset,
...@@ -674,8 +707,8 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offs ...@@ -674,8 +707,8 @@ __kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offs
} }
} }
__constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f }; __constant float c_YCrCb2RGBCoeffs_f[4] = { CR2RF, CR2GF, CB2GF, CB2BF };
__constant int c_YCrCb2RGBCoeffs_i[4] = { 22987, -11698, -5636, 29049 }; __constant int c_YCrCb2RGBCoeffs_i[4] = { CR2RI, CR2GI, CB2GI, CB2BI };
__kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset, __kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset, __global uchar* dst, int dst_step, int dst_offset,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment