Commit fa91d621 authored by Rostislav Vasilikhin's avatar Rostislav Vasilikhin Committed by Alexander Alekhin

Merge pull request #12876 from savuor:color_rgb2rgb_wide

* RGB2RGB initially rewritten

* NEON impl removed

* templated version added for ushort, float

* data copying allowed for RGB2RGB

* inplace processing fixed

* fields to local vars

* no zeroupper until it's fixed

* vx_cleanup() added back
parent 2268ed1b
...@@ -12,183 +12,118 @@ namespace cv ...@@ -12,183 +12,118 @@ namespace cv
////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// ////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
template<typename _Tp> struct RGB2RGB template<typename _Tp> struct v_type;
{
typedef _Tp channel_type;
RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {} template<>
void operator()(const _Tp* src, _Tp* dst, int n) const struct v_type<uchar>{
typedef v_uint8 t;
};
template<>
struct v_type<ushort>{
typedef v_uint16 t;
};
template<>
struct v_type<float>{
typedef v_float32 t;
};
template<typename _Tp> struct v_set;
template<>
struct v_set<uchar>
{
static inline v_type<uchar>::t set(uchar x)
{ {
int scn = srccn, dcn = dstcn, bidx = blueIdx; return vx_setall_u8(x);
if( dcn == 3 )
{
n *= 3;
for( int i = 0; i < n; i += 3, src += scn )
{
_Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
}
}
else if( scn == 3 )
{
n *= 3;
_Tp alpha = ColorChannel<_Tp>::max();
for( int i = 0; i < n; i += 3, dst += 4 )
{
_Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
}
}
else
{
n *= 4;
for( int i = 0; i < n; i += 4 )
{
_Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
dst[i+bidx] = t0; dst[i+1] = t1; dst[i+(bidx^2)] = t2; dst[i+3] = t3;
}
}
} }
};
int srccn, dstcn, blueIdx; template<>
struct v_set<ushort>
{
static inline v_type<ushort>::t set(ushort x)
{
return vx_setall_u16(x);
}
}; };
#if CV_NEON template<>
struct v_set<float>
{
static inline v_type<float>::t set(float x)
{
return vx_setall_f32(x);
}
};
template<> struct RGB2RGB<uchar> template<typename _Tp>
struct RGB2RGB
{ {
typedef uchar channel_type; typedef _Tp channel_type;
typedef typename v_type<_Tp>::t vt;
RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : RGB2RGB(int _srccn, int _dstcn, int _blueIdx) :
srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx)
{ {
v_alpha = vdupq_n_u8(ColorChannel<uchar>::max()); CV_Assert(srccn == 3 || srccn == 4);
v_alpha2 = vget_low_u8(v_alpha); CV_Assert(dstcn == 3 || dstcn == 4);
} }
void operator()(const uchar * src, uchar * dst, int n) const void operator()(const _Tp* src, _Tp* dst, int n) const
{ {
int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0; int scn = srccn, dcn = dstcn, bi = blueIdx;
if (dcn == 3) int i = 0;
_Tp alphav = ColorChannel<_Tp>::max();
#if CV_SIMD
const int vsize = vt::nlanes;
for(; i < n-vsize+1;
i += vsize, src += vsize*scn, dst += vsize*dcn)
{ {
n *= 3; vt a, b, c, d;
if (scn == 3) if(scn == 4)
{ {
for ( ; i <= n - 48; i += 48, src += 48 ) v_load_deinterleave(src, a, b, c, d);
{
uint8x16x3_t v_src = vld3q_u8(src), v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3q_u8(dst + i, v_dst);
}
for ( ; i <= n - 24; i += 24, src += 24 )
{
uint8x8x3_t v_src = vld3_u8(src), v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3_u8(dst + i, v_dst);
}
for ( ; i < n; i += 3, src += 3 )
{
uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
}
} }
else else
{ {
for ( ; i <= n - 48; i += 48, src += 64 ) v_load_deinterleave(src, a, b, c);
{ d = v_set<_Tp>::set(alphav);
uint8x16x4_t v_src = vld4q_u8(src);
uint8x16x3_t v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3q_u8(dst + i, v_dst);
}
for ( ; i <= n - 24; i += 24, src += 32 )
{
uint8x8x4_t v_src = vld4_u8(src);
uint8x8x3_t v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3_u8(dst + i, v_dst);
}
for ( ; i < n; i += 3, src += 4 )
{
uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
}
}
}
else if (scn == 3)
{
n *= 3;
for ( ; i <= n - 48; i += 48, dst += 64 )
{
uint8x16x3_t v_src = vld3q_u8(src + i);
uint8x16x4_t v_dst;
v_dst.val[bidx] = v_src.val[0];
v_dst.val[1] = v_src.val[1];
v_dst.val[bidx ^ 2] = v_src.val[2];
v_dst.val[3] = v_alpha;
vst4q_u8(dst, v_dst);
} }
for ( ; i <= n - 24; i += 24, dst += 32 ) if(bi == 2)
swap(a, c);
if(dcn == 4)
{ {
uint8x8x3_t v_src = vld3_u8(src + i); v_store_interleave(dst, a, b, c, d);
uint8x8x4_t v_dst;
v_dst.val[bidx] = v_src.val[0];
v_dst.val[1] = v_src.val[1];
v_dst.val[bidx ^ 2] = v_src.val[2];
v_dst.val[3] = v_alpha2;
vst4_u8(dst, v_dst);
} }
uchar alpha = ColorChannel<uchar>::max(); else
for (; i < n; i += 3, dst += 4 )
{ {
uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2]; v_store_interleave(dst, a, b, c);
dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
} }
} }
else vx_cleanup();
#endif
for ( ; i < n; i++, src += scn, dst += dcn )
{ {
n *= 4; _Tp t0 = src[0], t1 = src[1], t2 = src[2];
for ( ; i <= n - 64; i += 64 ) dst[bi ] = t0;
{ dst[1] = t1;
uint8x16x4_t v_src = vld4q_u8(src + i), v_dst; dst[bi^2] = t2;
v_dst.val[0] = v_src.val[bidx]; if(dcn == 4)
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx^2];
v_dst.val[3] = v_src.val[3];
vst4q_u8(dst + i, v_dst);
}
for ( ; i <= n - 32; i += 32 )
{ {
uint8x8x4_t v_src = vld4_u8(src + i), v_dst; _Tp d = scn == 4 ? src[3] : alphav;
v_dst.val[0] = v_src.val[bidx]; dst[3] = d;
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx^2];
v_dst.val[3] = v_src.val[3];
vst4_u8(dst + i, v_dst);
}
for ( ; i < n; i += 4)
{
uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
dst[i+bidx] = t0; dst[i+1] = t1; dst[i+(bidx^2)] = t2; dst[i+3] = t3;
} }
} }
} }
int srccn, dstcn, blueIdx; int srccn, dstcn, blueIdx;
uint8x16_t v_alpha;
uint8x8_t v_alpha2;
}; };
#endif
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB ////////// /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment