Commit fa91d621 authored by Rostislav Vasilikhin's avatar Rostislav Vasilikhin Committed by Alexander Alekhin

Merge pull request #12876 from savuor:color_rgb2rgb_wide

* RGB2RGB initially rewritten

* NEON impl removed

* templated version added for ushort, float

* data copying allowed for RGB2RGB

* inplace processing fixed

* fields to local vars

* no zeroupper until it's fixed

* vx_cleanup() added back
parent 2268ed1b
......@@ -12,183 +12,118 @@ namespace cv
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
template<typename _Tp> struct RGB2RGB
{
typedef _Tp channel_type;
template<typename _Tp> struct v_type;
RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {}
void operator()(const _Tp* src, _Tp* dst, int n) const
template<>
struct v_type<uchar>{
typedef v_uint8 t;
};
template<>
struct v_type<ushort>{
typedef v_uint16 t;
};
template<>
struct v_type<float>{
typedef v_float32 t;
};
template<typename _Tp> struct v_set;
template<>
struct v_set<uchar>
{
static inline v_type<uchar>::t set(uchar x)
{
int scn = srccn, dcn = dstcn, bidx = blueIdx;
if( dcn == 3 )
{
n *= 3;
for( int i = 0; i < n; i += 3, src += scn )
{
_Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
}
}
else if( scn == 3 )
{
n *= 3;
_Tp alpha = ColorChannel<_Tp>::max();
for( int i = 0; i < n; i += 3, dst += 4 )
{
_Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2];
dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
}
}
else
{
n *= 4;
for( int i = 0; i < n; i += 4 )
{
_Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
dst[i+bidx] = t0; dst[i+1] = t1; dst[i+(bidx^2)] = t2; dst[i+3] = t3;
}
}
return vx_setall_u8(x);
}
};
int srccn, dstcn, blueIdx;
template<>
struct v_set<ushort>
{
static inline v_type<ushort>::t set(ushort x)
{
return vx_setall_u16(x);
}
};
#if CV_NEON
template<>
struct v_set<float>
{
static inline v_type<float>::t set(float x)
{
return vx_setall_f32(x);
}
};
template<> struct RGB2RGB<uchar>
template<typename _Tp>
struct RGB2RGB
{
typedef uchar channel_type;
typedef _Tp channel_type;
typedef typename v_type<_Tp>::t vt;
RGB2RGB(int _srccn, int _dstcn, int _blueIdx) :
srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx)
{
v_alpha = vdupq_n_u8(ColorChannel<uchar>::max());
v_alpha2 = vget_low_u8(v_alpha);
CV_Assert(srccn == 3 || srccn == 4);
CV_Assert(dstcn == 3 || dstcn == 4);
}
void operator()(const uchar * src, uchar * dst, int n) const
void operator()(const _Tp* src, _Tp* dst, int n) const
{
int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0;
if (dcn == 3)
int scn = srccn, dcn = dstcn, bi = blueIdx;
int i = 0;
_Tp alphav = ColorChannel<_Tp>::max();
#if CV_SIMD
const int vsize = vt::nlanes;
for(; i < n-vsize+1;
i += vsize, src += vsize*scn, dst += vsize*dcn)
{
n *= 3;
if (scn == 3)
vt a, b, c, d;
if(scn == 4)
{
for ( ; i <= n - 48; i += 48, src += 48 )
{
uint8x16x3_t v_src = vld3q_u8(src), v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3q_u8(dst + i, v_dst);
}
for ( ; i <= n - 24; i += 24, src += 24 )
{
uint8x8x3_t v_src = vld3_u8(src), v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3_u8(dst + i, v_dst);
}
for ( ; i < n; i += 3, src += 3 )
{
uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
}
v_load_deinterleave(src, a, b, c, d);
}
else
{
for ( ; i <= n - 48; i += 48, src += 64 )
{
uint8x16x4_t v_src = vld4q_u8(src);
uint8x16x3_t v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3q_u8(dst + i, v_dst);
}
for ( ; i <= n - 24; i += 24, src += 32 )
{
uint8x8x4_t v_src = vld4_u8(src);
uint8x8x3_t v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx ^ 2];
vst3_u8(dst + i, v_dst);
}
for ( ; i < n; i += 3, src += 4 )
{
uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2;
}
}
}
else if (scn == 3)
{
n *= 3;
for ( ; i <= n - 48; i += 48, dst += 64 )
{
uint8x16x3_t v_src = vld3q_u8(src + i);
uint8x16x4_t v_dst;
v_dst.val[bidx] = v_src.val[0];
v_dst.val[1] = v_src.val[1];
v_dst.val[bidx ^ 2] = v_src.val[2];
v_dst.val[3] = v_alpha;
vst4q_u8(dst, v_dst);
v_load_deinterleave(src, a, b, c);
d = v_set<_Tp>::set(alphav);
}
for ( ; i <= n - 24; i += 24, dst += 32 )
if(bi == 2)
swap(a, c);
if(dcn == 4)
{
uint8x8x3_t v_src = vld3_u8(src + i);
uint8x8x4_t v_dst;
v_dst.val[bidx] = v_src.val[0];
v_dst.val[1] = v_src.val[1];
v_dst.val[bidx ^ 2] = v_src.val[2];
v_dst.val[3] = v_alpha2;
vst4_u8(dst, v_dst);
v_store_interleave(dst, a, b, c, d);
}
uchar alpha = ColorChannel<uchar>::max();
for (; i < n; i += 3, dst += 4 )
else
{
uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2];
dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha;
v_store_interleave(dst, a, b, c);
}
}
else
vx_cleanup();
#endif
for ( ; i < n; i++, src += scn, dst += dcn )
{
n *= 4;
for ( ; i <= n - 64; i += 64 )
{
uint8x16x4_t v_src = vld4q_u8(src + i), v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx^2];
v_dst.val[3] = v_src.val[3];
vst4q_u8(dst + i, v_dst);
}
for ( ; i <= n - 32; i += 32 )
_Tp t0 = src[0], t1 = src[1], t2 = src[2];
dst[bi ] = t0;
dst[1] = t1;
dst[bi^2] = t2;
if(dcn == 4)
{
uint8x8x4_t v_src = vld4_u8(src + i), v_dst;
v_dst.val[0] = v_src.val[bidx];
v_dst.val[1] = v_src.val[1];
v_dst.val[2] = v_src.val[bidx^2];
v_dst.val[3] = v_src.val[3];
vst4_u8(dst + i, v_dst);
}
for ( ; i < n; i += 4)
{
uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3];
dst[i+bidx] = t0; dst[i+1] = t1; dst[i+(bidx^2)] = t2; dst[i+3] = t3;
_Tp d = scn == 4 ? src[3] : alphav;
dst[3] = d;
}
}
}
int srccn, dstcn, blueIdx;
uint8x16_t v_alpha;
uint8x8_t v_alpha2;
};
#endif
/////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB //////////
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment