uchar4 dst_data = convert_uchar4_sat_rte((convert_float4(a))* ud * vd +(convert_float4(b))* u * vd + (convert_float4(c))* ud * v + (convert_float4(d)) * u * v );
uchar8 dst_data = convert_uchar8_sat_rte((convert_float8(a))* ud * vd +(convert_float8(b))* u * vd + (convert_float8(c))* ud * v + (convert_float8(d)) * u * v );
uchar16 val = (uchar16)(nval, nval, nval, nval);
a = (convert_uchar16(acc) == (uchar16)0)? a : val;
b = (convert_uchar16(bcc) == (uchar16)0)? b : val;
c = (convert_uchar16(ccc) == (uchar16)0)? c : val;
d = (convert_uchar16(dcc) == (uchar16)0)? d : val;
__global uchar8* D = (__global uchar8 *)(dst + dstStart);
float16 U = (float16)((float4)(u.x), (float4)(u.y), (float4)(u.z), (float4)(u.w));
float16 V = (float16)((float4)(v.x), (float4)(v.y), (float4)(v.z), (float4)(v.w));
float16 Ud = (float16)((float4)(ud.x), (float4)(ud.y), (float4)(ud.z), (float4)(ud.w));
uchar16 dst_data = convert_uchar16_sat_rte((convert_float16(a))* Ud * Vd +(convert_float16(b))* U * Vd + (convert_float16(c))* Ud * V + (convert_float16(d)) * U * V );
__global uchar16* D = (__global uchar16 *)(dst + dstStart);
uchar16 dVal = *D;
int16 con = (Gx >= 0 && Gx < (dst_cols<<2) && y >= 0 && y < dst_rows);