Commit 704c6882 authored by Rostislav Vasilikhin's avatar Rostislav Vasilikhin

OCL code fixed, fix for NEON added

parent 6c71988c
......@@ -6608,6 +6608,14 @@ struct RGB2Luv_f
{
float32x4x3_t v_src = vld3q_f32(src);
v_src.val[0] = vmaxq_f32(v_src.val[0], vdupq_n_f32(0));
v_src.val[1] = vmaxq_f32(v_src.val[1], vdupq_n_f32(0));
v_src.val[2] = vmaxq_f32(v_src.val[2], vdupq_n_f32(0));
v_src.val[0] = vminq_f32(v_src.val[0], vdupq_n_f32(1));
v_src.val[1] = vminq_f32(v_src.val[1], vdupq_n_f32(1));
v_src.val[2] = vminq_f32(v_src.val[2], vdupq_n_f32(1));
if( gammaTab )
{
v_src.val[0] = vmulq_f32(v_src.val[0], vdupq_n_f32(gscale));
......@@ -8574,7 +8582,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
}
float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
un = 13*4*_whitept[0]*d;
vn = 13*9*_whitept[1]*d;
......@@ -8641,9 +8649,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
}
float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
un = 4*_whitept[0]*d;
vn = 9*_whitept[1]*d;
float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
un = 4*13*_whitept[0]*d;
vn = 9*13*_whitept[1]*d;
Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
}
......
......@@ -1963,6 +1963,10 @@ __kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offse
float R = src[0], G = src[1], B = src[2];
R = clamp(R, 0.f, 1.f);
G = clamp(G, 0.f, 1.f);
B = clamp(B, 0.f, 1.f);
#ifdef SRGB
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
......@@ -2067,15 +2071,21 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float L = src[0], u = src[1], v = src[2], d, X, Y, Z;
Y = (L + 16.f) * (1.f/116.f);
Y = Y*Y*Y;
d = (1.f/13.f)/L;
u = fma(u, d, _un);
v = fma(v, d, _vn);
float iv = 1.f/v;
X = 2.25f * u * Y * iv;
Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
float L = src[0], u = src[1], v = src[2], X, Y, Z;
if(L >= 8)
{
Y = fma(L, 1.f/116.f, 16.f/116.f);
Y = Y*Y*Y;
}
else
{
Y = L * (1.0f/903.3f); // L*(3./29.)^3
}
float up = 3.f*fma(L, _un, u);
float vp = 0.25f/fma(L, _vn, v);
vp = clamp(vp, -0.25f, 0.25f);
X = 3.f*Y*up*vp;
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
......@@ -2129,14 +2139,20 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
float L = src[0]*(100.f/255.f);
float u = fma(convert_float(src[1]), 1.388235294117647f, -134.f);
float v = fma(convert_float(src[2]), 1.027450980392157f, - 140.f);
Y = (L + 16.f) * (1.f/116.f);
Y = Y*Y*Y;
d = (1.f/13.f)/L;
u = fma(u, d, _un);
v = fma(v, d, _vn);
float iv = 1.f/v;
X = 2.25f * u * Y * iv ;
Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
if(L >= 8)
{
Y = fma(L, 1.f/116.f, 16.f/116.f);
Y = Y*Y*Y;
}
else
{
Y = L * (1.0f/903.3f); // L*(3./29.)^3
}
float up = 3.f*fma(L, _un, u);
float vp = 0.25f/fma(L, _vn, v);
vp = clamp(vp, -0.25f, 0.25f);
X = 3.f*Y*up*vp;
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment