Bit-exact version of RGB2Luv_b (#9226)

* Imgproc_ColorLab_Full.accuracy test fixed * Lab and Luv tests: rewritten, constants explained * CV_ColorCvtBaseTest: added methods for 8u implementations * Lab2RGB_b: bit-exactness enabled for all modes; non-vectorized code fixed to comply with vectorized * srgb support added * XYZ constants made softdouble * bit-exact tests written for Lab * ColorLab_full test fixed * reverted: no 8u convertors for CV_ColorCvtBaseTest * added checksum-based test for Lab bit-exactness * extra declarations removed * Lab test fix: stop at first mismatch * test info output improved * error message fixed * lab_tetra squashed * initial version is almost written * unfinished work * compilation fixed, to be debugged * Lab test removed * more fixes * Luv2RGBinteger: channels order fixed * Lab structs removed * good trilinear interpolation added * several fixes * removed Luv2RGB interpolations, XYZ tables; 8-cell LUT added * no_interpolate made 8-cell * interpolations rewritten to 8-cell, minor fixes * packed interpolation added for RGB2Luv * tetra implemented * removing unnecessary code * LUT building merged * changes ported to color.cpp * minor fixes; try to suppress warnings * fixed v range of Luv * fixed incorrect src channel number * minor fixes * preliminary version of Luv2RGBinteger is done * Luv2RGB_b is in progress * XYZ color constants converted to softfloat * Luv test: precision fixed * Luv bit-exactness test added * warnings fixed * compilation fixed, error message fixed * test_lab.cpp removed

Bit-exact version of RGB2Luv_b (#9226)
* Imgproc_ColorLab_Full.accuracy test fixed * Lab and Luv tests: rewritten, constants explained * CV_ColorCvtBaseTest: added methods for 8u implementations * Lab2RGB_b: bit-exactness enabled for all modes; non-vectorized code fixed to comply with vectorized * srgb support added * XYZ constants made softdouble * bit-exact tests written for Lab * ColorLab_full test fixed * reverted: no 8u convertors for CV_ColorCvtBaseTest * added checksum-based test for Lab bit-exactness * extra declarations removed * Lab test fix: stop at first mismatch * test info output improved * error message fixed * lab_tetra squashed * initial version is almost written * unfinished work * compilation fixed, to be debugged * Lab test removed * more fixes * Luv2RGBinteger: channels order fixed * Lab structs removed * good trilinear interpolation added * several fixes * removed Luv2RGB interpolations, XYZ tables; 8-cell LUT added * no_interpolate made 8-cell * interpolations rewritten to 8-cell, minor fixes * packed interpolation added for RGB2Luv * tetra implemented * removing unnecessary code * LUT building merged * changes ported to color.cpp * minor fixes; try to suppress warnings * fixed v range of Luv * fixed incorrect src channel number * minor fixes * preliminary version of Luv2RGBinteger is done * Luv2RGB_b is in progress * XYZ color constants converted to softfloat * Luv test: precision fixed * Luv bit-exactness test added * warnings fixed * compilation fixed, error message fixed * test_lab.cpp removed
4435ec5f · Rostislav Vasilikhin · Vadim Pisarevsky · 9640bbe7 · 4435ec5f · 4435ec5f
Commit 4435ec5f authored Sep 14, 2017 by Rostislav Vasilikhin Committed by Vadim Pisarevsky Sep 14, 2017
Show whitespace changes
Inline Side-by-side

Showing with 651 additions and 146 deletions

color.cpp modules/imgproc/src/color.cpp +399 -145

test_color.cpp modules/imgproc/test/test_color.cpp +252 -1

No files found.
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -3475,27 +3475,47 @@ struct YCrCb2RGB_i<uchar>
 ////////////////////////////////////// RGB <-> XYZ ///////////////////////////////////////
-static const float sRGB2XYZ_D65[] =
+// 0.412453, 0.357580, 0.180423,
-{
+// 0.212671, 0.715160, 0.072169,
-    0.412453f, 0.357580f, 0.180423f,
+// 0.019334, 0.119193, 0.950227
-    0.212671f, 0.715160f, 0.072169f,
+static const softdouble sRGB2XYZ_D65[] =
-    0.019334f, 0.119193f, 0.950227f
+{
+    softdouble::fromRaw(0x3fda65a14488c60d),
+    softdouble::fromRaw(0x3fd6e297396d0918),
+    softdouble::fromRaw(0x3fc71819d2391d58),
+    softdouble::fromRaw(0x3fcb38cda6e75ff6),
+    softdouble::fromRaw(0x3fe6e297396d0918),
+    softdouble::fromRaw(0x3fb279aae6c8f755),
+    softdouble::fromRaw(0x3f93cc4ac6cdaf4b),
+    softdouble::fromRaw(0x3fbe836eb4e98138),
+    softdouble::fromRaw(0x3fee68427418d691)
 };
-static const float XYZ2sRGB_D65[] =
+//  3.240479, -1.53715, -0.498535,
-{
+// -0.969256, 1.875991, 0.041556,
-    3.240479f, -1.53715f, -0.498535f,
+//  0.055648, -0.204043, 1.057311
-   -0.969256f,  1.875991f, 0.041556f,
+static const softdouble XYZ2sRGB_D65[] =
-    0.055648f, -0.204043f, 1.057311f
+{
+    softdouble::fromRaw(0x4009ec804102ff8f),
+    softdouble::fromRaw(0xbff8982a9930be0e),
+    softdouble::fromRaw(0xbfdfe7ff583a53b9),
+    softdouble::fromRaw(0xbfef042528ae74f3),
+    softdouble::fromRaw(0x3ffe040f23897204),
+    softdouble::fromRaw(0x3fa546d3f9e7b80b),
+    softdouble::fromRaw(0x3fac7de5082cf52c),
+    softdouble::fromRaw(0xbfca1e14bdfd2631),
+    softdouble::fromRaw(0x3ff0eabef06b3786)
 };
 template<typename _Tp> struct RGB2XYZ_f
 {
    typedef _Tp channel_type;
    RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
    {
-        memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
+        for(int i = 0; i < 9; i++)
+            coeffs[i] = _coeffs ? _coeffs[i] : (float)sRGB2XYZ_D65[i];
        if(blueIdx == 0)
        {
            std::swap(coeffs[0], coeffs[2]);
@@ -3532,7 +3552,8 @@ struct RGB2XYZ_f<float>
    RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
    {
-        memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
+        for(int i = 0; i < 9; i++)
+            coeffs[i] = _coeffs ? _coeffs[i] : (float)sRGB2XYZ_D65[i];
        if(blueIdx == 0)
        {
            std::swap(coeffs[0], coeffs[2]);
@@ -3603,7 +3624,8 @@ struct RGB2XYZ_f<float>
    RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn)
    {
-        memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0]));
+        for(int i = 0; i < 9; i++)
+            coeffs[i] = _coeffs ? _coeffs[i] : (float)sRGB2XYZ_D65[i];
        if(blueIdx == 0)
        {
            std::swap(coeffs[0], coeffs[2]);
@@ -3999,7 +4021,8 @@ template<typename _Tp> struct XYZ2RGB_f
    XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
    : dstcn(_dstcn), blueIdx(_blueIdx)
    {
-        memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
+        for(int i = 0; i < 9; i++)
+            coeffs[i] = _coeffs ? _coeffs[i] : (float)XYZ2sRGB_D65[i];
        if(blueIdx == 0)
        {
            std::swap(coeffs[0], coeffs[6]);
@@ -4040,7 +4063,8 @@ struct XYZ2RGB_f<float>
    XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs)
    : dstcn(_dstcn), blueIdx(_blueIdx)
    {
-        memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0]));
+        for(int i = 0; i < 9; i++)
+            coeffs[i] = _coeffs ? _coeffs[i] : XYZ2sRGB_D65[i];
        if(blueIdx == 0)
        {
            std::swap(coeffs[0], coeffs[6]);
@@ -5815,7 +5839,10 @@ struct HLS2RGB_b
 ///////////////////////////////////// RGB <-> L*a*b* /////////////////////////////////////
-static const float D65[] = { 0.950456f, 1.f, 1.088754f };
+//0.950456, 1., 1.088754
+static const softdouble D65[] = {softdouble::fromRaw(0x3fee6a22b3892ee8),
+                                 softdouble::one(),
+                                 softdouble::fromRaw(0x3ff16b8950763a19)};
 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
 static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
@@ -5851,6 +5878,12 @@ static int16_t trilinearLUT[TRILINEAR_BASE*TRILINEAR_BASE*TRILINEAR_BASE*8];
 static ushort LabToYF_b[256*2];
 static const int minABvalue = -8145;
 static int abToXZ_b[LAB_BASE*9/4];
+// Luv constants
+static const bool enableRGB2LuvInterpolation = true;
+static const bool enablePackedRGB2Luv = true;
+static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
+static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
+static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
 #define clip(value) \
    value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
@@ -5980,40 +6013,55 @@ static void initLabTabs()
            abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231
        }
-        if(enableRGB2LabInterpolation)
+        //try to suppress warning
+        static const bool calcLUT = enableRGB2LabInterpolation || enableRGB2LuvInterpolation;
+        if(calcLUT)
        {
-            const float* _whitept = D65;
+            softfloat scaledCoeffs[9], coeffs[9];
-            softfloat coeffs[9];
            //RGB2Lab coeffs
-            softfloat scaleWhite[] = { softfloat::one()/softfloat(_whitept[0]),
+            softdouble scaleWhite[] = { softdouble::one()/D65[0],
-                                       softfloat::one(),
+                                        softdouble::one(),
-                                       softfloat::one()/softfloat(_whitept[2]) };
+                                        softdouble::one()/D65[2] };
            for(i = 0; i < 3; i++ )
            {
-                int j = i * 3;
+                coeffs[i*3+2] = sRGB2XYZ_D65[i*3+0];
-                coeffs[j + 2] = scaleWhite[i] * softfloat(sRGB2XYZ_D65[j    ]);
+                coeffs[i*3+1] = sRGB2XYZ_D65[i*3+1];
-                coeffs[j + 1] = scaleWhite[i] * softfloat(sRGB2XYZ_D65[j + 1]);
+                coeffs[i*3+0] = sRGB2XYZ_D65[i*3+2];
-                coeffs[j + 0] = scaleWhite[i] * softfloat(sRGB2XYZ_D65[j + 2]);
+                scaledCoeffs[i*3+0] = sRGB2XYZ_D65[i*3+2] * scaleWhite[i];
+                scaledCoeffs[i*3+1] = sRGB2XYZ_D65[i*3+1] * scaleWhite[i];
+                scaledCoeffs[i*3+2] = sRGB2XYZ_D65[i*3+0] * scaleWhite[i];
            }
-            softfloat D0 = coeffs[0], D1 = coeffs[1], D2 = coeffs[2],
+            softfloat S0 = scaledCoeffs[0], S1 = scaledCoeffs[1], S2 = scaledCoeffs[2],
-                      D3 = coeffs[3], D4 = coeffs[4], D5 = coeffs[5],
+                      S3 = scaledCoeffs[3], S4 = scaledCoeffs[4], S5 = scaledCoeffs[5],
-                      D6 = coeffs[6], D7 = coeffs[7], D8 = coeffs[8];
+                      S6 = scaledCoeffs[6], S7 = scaledCoeffs[7], S8 = scaledCoeffs[8];
+            softfloat C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
+                      C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
+                      C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
-            //903.3f = (29/3)^3
+            softfloat dd = D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3);
+            dd = softfloat::one()/max(dd, softfloat(FLT_EPSILON));
+            softfloat un = dd*softfloat(13*4)*D65[0];
+            softfloat vn = dd*softfloat(13*9)*D65[1];
+            //u, v: [-134.0, 220.0], [-140.0, 122.0]
            static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16), f500(500), f200(200);
            static const softfloat f100(100), f128(128), f256(256), lbase((int)LAB_BASE);
+            //903.3f = (29/3)^3
            static const softfloat f9033 = softfloat(29*29*29)/softfloat(27);
+            static const softfloat f9of4 = softfloat(9)/softfloat(4);
+            static const softfloat f15(15), f3(3);
            AutoBuffer<int16_t> RGB2Labprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3);
+            AutoBuffer<int16_t> RGB2Luvprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3);
            for(int p = 0; p < LAB_LUT_DIM; p++)
            {
                for(int q = 0; q < LAB_LUT_DIM; q++)
                {
                    for(int r = 0; r < LAB_LUT_DIM; r++)
                    {
-                        //RGB 2 Lab LUT building
+                        int idx = p*3 + q*LAB_LUT_DIM*3 + r*LAB_LUT_DIM*LAB_LUT_DIM*3;
                        softfloat R = softfloat(p)/lld;
                        softfloat G = softfloat(q)/lld;
                        softfloat B = softfloat(r)/lld;
@@ -6022,9 +6070,11 @@ static void initLabTabs()
                        G = applyGamma(G);
                        B = applyGamma(B);
-                        softfloat X = R*D0 + G*D1 + B*D2;
+                        //RGB 2 Lab LUT building
-                        softfloat Y = R*D3 + G*D4 + B*D5;
+                        {
-                        softfloat Z = R*D6 + G*D7 + B*D8;
+                            softfloat X = R*S0 + G*S1 + B*S2;
+                            softfloat Y = R*S3 + G*S4 + B*S5;
+                            softfloat Z = R*S6 + G*S7 + B*S8;
                            softfloat FX = X > lthresh ? cbrt(X) : mulAdd(X, lscale, lbias);
                            softfloat FY = Y > lthresh ? cbrt(Y) : mulAdd(Y, lscale, lbias);
@@ -6034,11 +6084,29 @@ static void initLabTabs()
                            softfloat a = f500 * (FX - FY);
                            softfloat b = f200 * (FY - FZ);
-                        int idx = p*3 + q*LAB_LUT_DIM*3 + r*LAB_LUT_DIM*LAB_LUT_DIM*3;
                            RGB2Labprev[idx]   = (int16_t)(cvRound(lbase*L/f100));
                            RGB2Labprev[idx+1] = (int16_t)(cvRound(lbase*(a + f128)/f256));
                            RGB2Labprev[idx+2] = (int16_t)(cvRound(lbase*(b + f128)/f256));
                        }
+                        //RGB 2 Luv LUT building
+                        {
+                            softfloat X = R*C0 + G*C1 + B*C2;
+                            softfloat Y = R*C3 + G*C4 + B*C5;
+                            softfloat Z = R*C6 + G*C7 + B*C8;
+                            softfloat L = Y < lthresh ? mulAdd(Y, lscale, lbias) : cbrt(Y);
+                            L = L*f116 - f16;
+                            softfloat d = softfloat(4*13)/max(X + f15 * Y + f3 * Z, softfloat(FLT_EPSILON));
+                            softfloat u = L*(X*d - un);
+                            softfloat v = L*(f9of4*Y*d - vn);
+                            RGB2Luvprev[idx  ] = (int16_t)cvRound(lbase*L/f100);
+                            RGB2Luvprev[idx+1] = (int16_t)cvRound(lbase*(u-uLow)/uRange);
+                            RGB2Luvprev[idx+2] = (int16_t)cvRound(lbase*(v-vLow)/vRange);
+                        }
+                    }
                }
            }
            for(int p = 0; p < LAB_LUT_DIM; p++)
@@ -6057,6 +6125,9 @@ static void initLabTabs()
                                RGB2LabLUT_s16[idxnew]    = RGB2Labprev[idxold];\
                                RGB2LabLUT_s16[idxnew+8]  = RGB2Labprev[idxold+1];\
                                RGB2LabLUT_s16[idxnew+16] = RGB2Labprev[idxold+2];\
+                                RGB2LuvLUT_s16[idxnew]    = RGB2Luvprev[idxold];\
+                                RGB2LuvLUT_s16[idxnew+8]  = RGB2Luvprev[idxold+1];\
+                                RGB2LuvLUT_s16[idxnew+16] = RGB2Luvprev[idxold+2];\
                            } while(0)
                        FILL(0, 0, 0); FILL(0, 0, 1);
@@ -6228,17 +6299,25 @@ struct RGB2Lab_b
        static volatile int _3 = 3;
        initLabTabs();
-        if (!_coeffs)
+        softdouble whitePt[3];
-            _coeffs = sRGB2XYZ_D65;
+        for(int i = 0; i < 3; i++)
-        if (!_whitept)
+            if(_whitept)
-            _whitept = D65;
+                whitePt[i] = softdouble(_whitept[i]);
+            else
+                whitePt[i] = D65[i];
-        static const softfloat lshift(1 << lab_shift);
+        static const softdouble lshift(1 << lab_shift);
        for( int i = 0; i < _3; i++ )
        {
-            coeffs[i*3+(blueIdx^2)] = cvRound((lshift*softfloat(_coeffs[i*3  ]))/softfloat(_whitept[i]));
+            softdouble c[3];
-            coeffs[i*3+1]           = cvRound((lshift*softfloat(_coeffs[i*3+1]))/softfloat(_whitept[i]));
+            for(int j = 0; j < 3; j++)
-            coeffs[i*3+blueIdx]     = cvRound((lshift*softfloat(_coeffs[i*3+2]))/softfloat(_whitept[i]));
+                if(_coeffs)
+                    c[j] = softdouble(_coeffs[i*3+j]);
+                else
+                    c[j] = sRGB2XYZ_D65[i*3+j];
+            coeffs[i*3+(blueIdx^2)] = cvRound(lshift*c[0]/whitePt[i]);
+            coeffs[i*3+1]           = cvRound(lshift*c[1]/whitePt[i]);
+            coeffs[i*3+blueIdx]     = cvRound(lshift*c[2]/whitePt[i]);
            CV_Assert(coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
                      coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift));
@@ -6293,27 +6372,31 @@ struct RGB2Lab_f
        useInterpolation = (!_coeffs && !_whitept && srgb && enableRGB2LabInterpolation);
-        if (!_coeffs)
+        softdouble whitePt[3];
-            _coeffs = sRGB2XYZ_D65;
+        for(int i = 0; i < 3; i++)
-        if (!_whitept)
+            if(_whitept)
-            _whitept = D65;
+                whitePt[i] = softdouble((double)_whitept[i]);
+            else
+                whitePt[i] = D65[i];
-        softfloat scale[] = { softfloat::one() / softfloat(_whitept[0]),
+        softdouble scale[] = { softdouble::one() / whitePt[0],
-                              softfloat::one(),
+                               softdouble::one(),
-                              softfloat::one() / softfloat(_whitept[2]) };
+                               softdouble::one() / whitePt[2] };
        for( int i = 0; i < _3; i++ )
        {
-            int j = i * 3;
+            softfloat c[3];
-            softfloat c0 = scale[i] * softfloat(_coeffs[j    ]);
+            for(int k = 0; k < 3; k++)
-            softfloat c1 = scale[i] * softfloat(_coeffs[j + 1]);
+                if(_coeffs)
-            softfloat c2 = scale[i] * softfloat(_coeffs[j + 2]);
+                    c[k] = scale[i] * softdouble((double)_coeffs[i*3 + k]);
-            coeffs[j + (blueIdx ^ 2)] = c0;
+                else
-            coeffs[j + 1]             = c1;
+                    c[k] = scale[i] * sRGB2XYZ_D65[i*3 + k];
-            coeffs[j + blueIdx]       = c2;
+            coeffs[i*3 + (blueIdx ^ 2)] = c[0];
+            coeffs[i*3 + 1]             = c[1];
+            coeffs[i*3 + blueIdx]       = c[2];
-            CV_Assert( c0 >= 0 && c1 >= 0 && c2 >= 0 &&
+            CV_Assert( c[0] >= 0 && c[1] >= 0 && c[2] >= 0 &&
-                       c0 + c1 + c2 < softfloat((int)LAB_CBRT_TAB_SIZE) );
+                       c[0] + c[1] + c[2] < softfloat((int)LAB_CBRT_TAB_SIZE) );
        }
    }
@@ -6481,16 +6564,25 @@ struct Lab2RGBfloat
    {
        initLabTabs();
-        if(!_coeffs)
+        softdouble whitePt[3];
-            _coeffs = XYZ2sRGB_D65;
+        for(int i = 0; i < 3; i++)
-        if(!_whitept)
+            if(_whitept)
-            _whitept = D65;
+                whitePt[i] = softdouble((double)_whitept[i]);
+            else
+                whitePt[i] = D65[i];
        for( int i = 0; i < 3; i++ )
        {
-            coeffs[i+(blueIdx^2)*3] = (softfloat(_coeffs[i]  )*softfloat(_whitept[i]));
+            softdouble c[3];
-            coeffs[i+3]             = (softfloat(_coeffs[i+3])*softfloat(_whitept[i]));
+            for(int j = 0; j < 3; j++)
-            coeffs[i+blueIdx*3]     = (softfloat(_coeffs[i+6])*softfloat(_whitept[i]));
+                if(_coeffs)
+                    c[j] = softdouble(_coeffs[i+j*3]);
+                else
+                    c[j] = XYZ2sRGB_D65[i+j*3];
+            coeffs[i+(blueIdx^2)*3] = (float)(c[0]*whitePt[i]);
+            coeffs[i+3]             = (float)(c[1]*whitePt[i]);
+            coeffs[i+blueIdx*3]     = (float)(c[2]*whitePt[i]);
        }
        lThresh = softfloat(8); // 0.008856f * 903.3f  = (6/29)^3*(29/3)^3 = 8
@@ -6744,17 +6836,26 @@ struct Lab2RGBinteger
                    const float* _whitept, bool srgb )
    : dstcn(_dstcn)
    {
-        if(!_coeffs)
+        softdouble whitePt[3];
-            _coeffs = XYZ2sRGB_D65;
+        for(int i = 0; i < 3; i++)
-        if(!_whitept)
+            if(_whitept)
-            _whitept = D65;
+                whitePt[i] = softdouble(_whitept[i]);
+            else
+                whitePt[i] = D65[i];
-        static const softfloat lshift(1 << lab_shift);
+        static const softdouble lshift(1 << lab_shift);
        for(int i = 0; i < 3; i++)
        {
-            coeffs[i+(blueIdx)*3]   = cvRound(lshift*softfloat(_coeffs[i  ])*softfloat(_whitept[i]));
+            softdouble c[3];
-            coeffs[i+3]             = cvRound(lshift*softfloat(_coeffs[i+3])*softfloat(_whitept[i]));
+            for(int j = 0; j < 3; j++)
-            coeffs[i+(blueIdx^2)*3] = cvRound(lshift*softfloat(_coeffs[i+6])*softfloat(_whitept[i]));
+                if(_coeffs)
+                    c[j] = softdouble(_coeffs[i+j*3]);
+                else
+                    c[j] = XYZ2sRGB_D65[i+j*3];
+            coeffs[i+(blueIdx)*3]   = cvRound(lshift*c[0]*whitePt[i]);
+            coeffs[i+3]             = cvRound(lshift*c[1]*whitePt[i]);
+            coeffs[i+(blueIdx^2)*3] = cvRound(lshift*c[2]*whitePt[i]);
        }
        tab = srgb ? sRGBInvGammaTab_b : linearInvGammaTab_b;
@@ -7332,29 +7433,34 @@ struct Lab2RGB_b
    int dstcn;
 };
-#undef clip
 ///////////////////////////////////// RGB <-> L*u*v* /////////////////////////////////////
-struct RGB2Luv_f
+struct RGB2Luvfloat
 {
    typedef float channel_type;
-    RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs,
+    RGB2Luvfloat( int _srccn, int blueIdx, const float* _coeffs,
               const float* whitept, bool _srgb )
    : srccn(_srccn), srgb(_srgb)
    {
        volatile int i;
        initLabTabs();
-        if(!_coeffs) _coeffs = sRGB2XYZ_D65;
+        softdouble whitePt[3];
-        if(!whitept) whitept = D65;
+        for( i = 0; i < 3; i++ )
+            if(whitept)
+                whitePt[i] = softdouble(whitept[i]);
+            else
+                whitePt[i] = D65[i];
        for( i = 0; i < 3; i++ )
        {
-            coeffs[i*3] = _coeffs[i*3];
+            for(int j = 0; j < 3; j++)
-            coeffs[i*3+1] = _coeffs[i*3+1];
+                if(_coeffs)
-            coeffs[i*3+2] = _coeffs[i*3+2];
+                    coeffs[i*3+j] = _coeffs[i*3+j];
+                else
+                    coeffs[i*3+j] = (float)(sRGB2XYZ_D65[i*3+j]);
            if( blueIdx == 0 )
                std::swap(coeffs[i*3], coeffs[i*3+2]);
            CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
@@ -7363,18 +7469,18 @@ struct RGB2Luv_f
                      softfloat(coeffs[i*3+2]) < softfloat(1.5f) );
        }
-        softfloat d = softfloat(whitept[0]) +
+        softfloat d = whitePt[0] +
-                      softfloat(whitept[1])*softfloat(15) +
+                      whitePt[1]*softdouble(15) +
-                      softfloat(whitept[2])*softfloat(3);
+                      whitePt[2]*softdouble(3);
        d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
-        un = d*softfloat(13*4)*softfloat(whitept[0]);
+        un = d*softfloat(13*4)*whitePt[0];
-        vn = d*softfloat(13*9)*softfloat(whitept[1]);
+        vn = d*softfloat(13*9)*whitePt[1];
        #if CV_SSE2
        haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
        #endif
-        CV_Assert(whitept[1] == 1.f);
+        CV_Assert(whitePt[1] == softdouble::one());
    }
    #if CV_NEON
@@ -7640,6 +7746,23 @@ struct RGB2Luv_f
    #endif
 };
+struct RGB2Luv_f
+{
+    typedef float channel_type;
+    RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs,
+               const float* whitept, bool _srgb )
+    : fcvt(_srccn, blueIdx, _coeffs, whitept, _srgb), srccn(_srccn)
+    { }
+    void operator()(const float* src, float* dst, int n) const
+    {
+        fcvt(src, dst, n);
+    }
+    RGB2Luvfloat fcvt;
+    int srccn;
+};
 struct Luv2RGB_f
 {
@@ -7651,27 +7774,38 @@ struct Luv2RGB_f
    {
        initLabTabs();
-        if(!_coeffs) _coeffs = XYZ2sRGB_D65;
+        softdouble whitePt[3];
-        if(!whitept) whitept = D65;
+        for(int i = 0; i < 3; i++)
+            if(whitept)
+                whitePt[i] = softdouble(whitept[i]);
+            else
+                whitePt[i] = D65[i];
        for( int i = 0; i < 3; i++ )
        {
-            coeffs[i+(blueIdx^2)*3] = _coeffs[i];
+            softfloat c[3];
-            coeffs[i+3] = _coeffs[i+3];
+            for(int j = 0; j < 3; j++)
-            coeffs[i+blueIdx*3] = _coeffs[i+6];
+                if(_coeffs)
+                    c[j] = softfloat(_coeffs[i+j*3]);
+                else
+                    c[j] = XYZ2sRGB_D65[i+j*3];
+            coeffs[i+(blueIdx^2)*3] = c[0];
+            coeffs[i+3]             = c[1];
+            coeffs[i+blueIdx*3]     = c[2];
        }
-        softfloat d = softfloat(whitept[0]) +
+        softfloat d = whitePt[0] +
-                      softfloat(whitept[1])*softfloat(15) +
+                      whitePt[1]*softdouble(15) +
-                      softfloat(whitept[2])*softfloat(3);
+                      whitePt[2]*softdouble(3);
        d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
-        un = softfloat(4*13)*d*softfloat(whitept[0]);
+        un = softfloat(4*13)*d*whitePt[0];
-        vn = softfloat(9*13)*d*softfloat(whitept[1]);
+        vn = softfloat(9*13)*d*whitePt[1];
        #if CV_SSE2
        haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
        #endif
-        CV_Assert(whitept[1] == 1.f);
+        CV_Assert(whitePt[1] == softdouble::one());
    }
    #if CV_SSE2
@@ -7876,6 +8010,102 @@ struct Luv2RGB_f
    #endif
 };
+struct RGB2Luvinterpolate
+{
+    typedef uchar channel_type;
+    RGB2Luvinterpolate( int _srccn, int _blueIdx, const float* /* _coeffs */,
+                        const float* /* _whitept */, bool /*_srgb*/ )
+    : srccn(_srccn), blueIdx(_blueIdx)
+    {
+        initLabTabs();
+    }
+    void operator()(const uchar* src, uchar* dst, int n) const
+    {
+        int i, scn = srccn, bIdx = blueIdx;
+        i = 0; n *= 3;
+        if(enablePackedRGB2Luv)
+        {
+            static const int nPixels = 8*2;
+            for(; i < n - 3*nPixels; i += 3*nPixels, src += scn*nPixels)
+            {
+                /*
+                    int R = src[bIdx], G = src[1], B = src[bIdx^2];
+                    */
+                v_uint8x16 r16, g16, b16, dummy16;
+                if(scn == 3)
+                {
+                    v_load_deinterleave(src, r16, g16, b16);
+                }
+                else // scn == 4
+                {
+                    v_load_deinterleave(src, r16, g16, b16, dummy16);
+                }
+                if(bIdx)
+                {
+                    dummy16 = r16; r16 = b16; b16 = dummy16;
+                }
+                /*
+                    static const int baseDiv = LAB_BASE/256;
+                    R = R*baseDiv, G = G*baseDiv, B = B*baseDiv;
+                    */
+                v_uint16x8 r80, r81, g80, g81, b80, b81;
+                v_expand(r16, r80, r81);
+                v_expand(g16, g80, g81);
+                v_expand(b16, b80, b81);
+                r80 = r80 << (lab_base_shift - 8); r81 = r81 << (lab_base_shift - 8);
+                g80 = g80 << (lab_base_shift - 8); g81 = g81 << (lab_base_shift - 8);
+                b80 = b80 << (lab_base_shift - 8); b81 = b81 << (lab_base_shift - 8);
+                /*
+                    int L, u, v;
+                    trilinearInterpolate(R, G, B, RGB2LuvLUT_s16, L, u, v);
+                    */
+                v_uint16x8 l80, u80, v80, l81, u81, v81;
+                trilinearPackedInterpolate(r80, g80, b80, RGB2LuvLUT_s16, l80, u80, v80);
+                trilinearPackedInterpolate(r81, g81, b81, RGB2LuvLUT_s16, l81, u81, v81);
+                /*
+                    dst[i] = saturate_cast<uchar>(L/baseDiv);
+                    dst[i+1] = saturate_cast<uchar>(u/baseDiv);
+                    dst[i+2] = saturate_cast<uchar>(v/baseDiv);
+                    */
+                l80 = l80 >> (lab_base_shift - 8); l81 = l81 >> (lab_base_shift - 8);
+                u80 = u80 >> (lab_base_shift - 8); u81 = u81 >> (lab_base_shift - 8);
+                v80 = v80 >> (lab_base_shift - 8); v81 = v81 >> (lab_base_shift - 8);
+                v_uint8x16 l16 = v_pack(l80, l81);
+                v_uint8x16 u16 = v_pack(u80, u81);
+                v_uint8x16 v16 = v_pack(v80, v81);
+                v_store_interleave(dst + i, l16, u16, v16);
+            }
+        }
+        for(; i < n; i += 3, src += scn)
+        {
+            int R = src[bIdx], G = src[1], B = src[bIdx^2];
+            // (LAB_BASE/255) gives more accuracy but not very much
+            static const int baseDiv = LAB_BASE/256;
+            R = R*baseDiv, G = G*baseDiv, B = B*baseDiv;
+            int L, u, v;
+            trilinearInterpolate(R, G, B, RGB2LuvLUT_s16, L, u, v);
+            dst[i] = saturate_cast<uchar>(L/baseDiv);
+            dst[i+1] = saturate_cast<uchar>(u/baseDiv);
+            dst[i+2] = saturate_cast<uchar>(v/baseDiv);
+        }
+    }
+    int srccn;
+    int blueIdx;
+};
 struct RGB2Luv_b
 {
@@ -7883,21 +8113,26 @@ struct RGB2Luv_b
    RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs,
               const float* _whitept, bool _srgb )
-    : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb)
+    : srccn(_srccn),
+      fcvt(3, blueIdx, _coeffs, _whitept, _srgb),
+      icvt(_srccn, blueIdx, _coeffs, _whitept, _srgb)
    {
-        //0.72033 = 255/(220+134), 96.525 = 134*255/(220+134)
+        useInterpolation = (!_coeffs && !_whitept && _srgb
-        //0.9732 = 255/(140+122), 136.259 = 140*255/(140+122)
+                            && enableBitExactness
+                            && enableRGB2LuvInterpolation);
+        static const softfloat f255(255);
        #if CV_NEON
-        v_scale_inv = vdupq_n_f32(1.f/255.f);
+        v_scale_inv = vdupq_n_f32(softfloat::one()/f255);
-        v_scale = vdupq_n_f32(2.55f);
+        v_scale = vdupq_n_f32(f255/softfloat(100));
-        v_coeff1 = vdupq_n_f32(0.72033898305084743f);
+        v_coeff1 = vdupq_n_f32(f255/uRange);
-        v_coeff2 = vdupq_n_f32(96.525423728813564f);
+        v_coeff2 = vdupq_n_f32(-uLow*f255/uRange);
-        v_coeff3 = vdupq_n_f32(0.9732824427480916f);
+        v_coeff3 = vdupq_n_f32(f255/vRange);
-        v_coeff4 = vdupq_n_f32(136.259541984732824f);
+        v_coeff4 = vdupq_n_f32(-vLow*f255/vRange);
        v_alpha = vdup_n_u8(ColorChannel<uchar>::max());
        #elif CV_SSE2
        v_zero = _mm_setzero_si128();
-        v_scale_inv = _mm_set1_ps(1.f/255.f);
+        v_scale_inv = _mm_set1_ps(softfloat::one()/f255);
        haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
        #endif
    }
@@ -7930,12 +8165,19 @@ struct RGB2Luv_b
    void operator()(const uchar* src, uchar* dst, int n) const
    {
+        if(useInterpolation)
+        {
+            icvt(src, dst, n);
+            return;
+        }
        int i, j, scn = srccn;
        float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
+        static const softfloat f255(255);
        #if CV_SSE2
-        __m128 v_coeffs = _mm_set_ps(2.55f, 0.9732824427480916f, 0.72033898305084743f, 2.55f);
+        __m128 v_coeffs = _mm_set_ps(f255/softfloat(100), f255/vRange, f255/uRange, f255/softfloat(100));
-        __m128 v_res = _mm_set_ps(0.f, 136.259541984732824f, 96.525423728813564f, 0.f);
+        __m128 v_res = _mm_set_ps(0.f, -vLow*f255/vRange, -uLow*f255/uRange, 0.f);
        #endif
        for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 )
@@ -8015,13 +8257,14 @@ struct RGB2Luv_b
                    src -= jr, j -= jr;
            }
            #endif
+            static const softfloat f255inv = softfloat::one()/f255;
            for( ; j < dn*3; j += 3, src += scn )
            {
-                buf[j] = src[0]*(1.f/255.f);
+                buf[j  ] = (float)(src[0]*((float)f255inv));
-                buf[j+1] = (float)(src[1]*(1.f/255.f));
+                buf[j+1] = (float)(src[1]*((float)f255inv));
-                buf[j+2] = (float)(src[2]*(1.f/255.f));
+                buf[j+2] = (float)(src[2]*((float)f255inv));
            }
-            cvt(buf, buf, dn);
+            fcvt(buf, buf, dn);
            j = 0;
            #if CV_NEON
@@ -8056,17 +8299,23 @@ struct RGB2Luv_b
            }
            #endif
+            static const softfloat fL = f255/softfloat(100);
+            static const softfloat fu = f255/uRange;
+            static const softfloat fv = f255/vRange;
+            static const softfloat su = -uLow*f255/uRange;
+            static const softfloat sv = -vLow*f255/vRange;
            for( ; j < dn*3; j += 3 )
            {
-                dst[j] = saturate_cast<uchar>(buf[j]*2.55f);
+                dst[j] = saturate_cast<uchar>(buf[j]*(float)fL);
-                dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f);
+                dst[j+1] = saturate_cast<uchar>(buf[j+1]*(float)fu + (float)su);
-                dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.9732824427480916f + 136.259541984732824f);
+                dst[j+2] = saturate_cast<uchar>(buf[j+2]*(float)fv + (float)sv);
            }
        }
    }
    int srccn;
-    RGB2Luv_f cvt;
+    RGB2Luvfloat fcvt;
+    RGB2Luvinterpolate icvt;
    #if CV_NEON
    float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4;
@@ -8076,6 +8325,7 @@ struct RGB2Luv_b
    __m128i v_zero;
    bool haveSIMD;
    #endif
+    bool useInterpolation;
 };
@@ -8316,6 +8566,7 @@ struct Luv2RGB_b
    #endif
 };
+#undef clip
 ///////////////////////////////////// YUV420 -> RGB /////////////////////////////////////
@@ -9407,13 +9658,12 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
            {
                int coeffs[9];
-                const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
                static const softfloat lshift(1 << lab_shift);
                for( int i = 0; i < 3; i++ )
                {
-                    coeffs[i*3+(bidx^2)] = cvRound(lshift*softfloat(_coeffs[i*3  ])/softfloat(_whitept[i]));
+                    coeffs[i*3+(bidx^2)] = cvRound(lshift*sRGB2XYZ_D65[i*3  ]/D65[i]);
-                    coeffs[i*3+1]        = cvRound(lshift*softfloat(_coeffs[i*3+1])/softfloat(_whitept[i]));
+                    coeffs[i*3+1]        = cvRound(lshift*sRGB2XYZ_D65[i*3+1]/D65[i]);
-                    coeffs[i*3+bidx]     = cvRound(lshift*softfloat(_coeffs[i*3+2])/softfloat(_whitept[i]));
+                    coeffs[i*3+bidx]     = cvRound(lshift*sRGB2XYZ_D65[i*3+2]/D65[i]);
                    CV_Assert(coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
                              coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift));
@@ -9440,19 +9690,21 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
            {
                float coeffs[9];
-                const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
+                softdouble whitePt[3];
+                for(int i = 0; i < 3; i++)
+                    whitePt[i] = D65[i];
-                softfloat scale[] = { softfloat::one() / softfloat(_whitept[0]),
+                softdouble scale[] = { softdouble::one() / whitePt[0],
-                                      softfloat::one(),
+                                       softdouble::one(),
-                                      softfloat::one() / softfloat(_whitept[2]) };
+                                       softdouble::one() / whitePt[2] };
                for (int i = 0; i < 3; i++)
                {
                    int j = i * 3;
-                    softfloat c0 = (lab ? scale[i] : softfloat::one()) * softfloat(_coeffs[j    ]);
+                    softfloat c0 = (lab ? scale[i] : softdouble::one()) * sRGB2XYZ_D65[j    ];
-                    softfloat c1 = (lab ? scale[i] : softfloat::one()) * softfloat(_coeffs[j + 1]);
+                    softfloat c1 = (lab ? scale[i] : softdouble::one()) * sRGB2XYZ_D65[j + 1];
-                    softfloat c2 = (lab ? scale[i] : softfloat::one()) * softfloat(_coeffs[j + 2]);
+                    softfloat c2 = (lab ? scale[i] : softdouble::one()) * sRGB2XYZ_D65[j + 2];
                    coeffs[j + (bidx ^ 2)] = c0;
                    coeffs[j + 1]          = c1;
@@ -9462,12 +9714,12 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
                               c0 + c1 + c2 < (lab ? softfloat((int)LAB_CBRT_TAB_SIZE) : softfloat(3)/softfloat(2)));
                }
-                softfloat d = softfloat(_whitept[0]) +
+                softfloat d = whitePt[0] +
-                              softfloat(_whitept[1])*softfloat(15) +
+                              whitePt[1]*softdouble(15) +
-                              softfloat(_whitept[2])*softfloat(3);
+                              whitePt[2]*softdouble(3);
                d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
-                un = d*softfloat(13*4)*softfloat(_whitept[0]);
+                un = d*softfloat(13*4)*whitePt[0];
-                vn = d*softfloat(13*9)*softfloat(_whitept[1]);
+                vn = d*softfloat(13*9)*whitePt[1];
                Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
            }
@@ -9524,21 +9776,23 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
        {
            float coeffs[9];
-            const float * const _coeffs = XYZ2sRGB_D65, * const _whitept = D65;
+            softdouble whitePt[3];
+            for(int i = 0; i < 3; i++)
+                whitePt[i] = D65[i];
            for( int i = 0; i < 3; i++ )
            {
-                coeffs[i+(bidx^2)*3] = softfloat(_coeffs[i]  )*softfloat(lab ? _whitept[i] : 1);
+                coeffs[i+(bidx^2)*3] = (float)(XYZ2sRGB_D65[i  ]*(lab ? whitePt[i] : softdouble::one()));
-                coeffs[i+3]          = softfloat(_coeffs[i+3])*softfloat(lab ? _whitept[i] : 1);
+                coeffs[i+3]          = (float)(XYZ2sRGB_D65[i+3]*(lab ? whitePt[i] : softdouble::one()));
-                coeffs[i+bidx*3]     = softfloat(_coeffs[i+6])*softfloat(lab ? _whitept[i] : 1);
+                coeffs[i+bidx*3]     = (float)(XYZ2sRGB_D65[i+6]*(lab ? whitePt[i] : softdouble::one()));
            }
-            softfloat d = softfloat(_whitept[0]) +
+            softfloat d = whitePt[0] +
-                          softfloat(_whitept[1])*softfloat(15) +
+                          whitePt[1]*softdouble(15) +
-                          softfloat(_whitept[2])*softfloat(3);
+                          whitePt[2]*softdouble(3);
            d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
-            un = softfloat(4*13)*d*softfloat(_whitept[0]);
+            un = softfloat(4*13)*d*whitePt[0];
-            vn = softfloat(9*13)*d*softfloat(_whitept[1]);
+            vn = softfloat(9*13)*d*whitePt[1];
            Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
        }

--- a/modules/imgproc/test/test_color.cpp
+++ b/modules/imgproc/test/test_color.cpp
@@ -1256,7 +1256,10 @@ void CV_ColorLuvTest::get_test_array_types_and_sizes( int test_case_idx, vector<
 double CV_ColorLuvTest::get_success_error_level( int /*test_case_idx*/, int i, int j )
 {
    int depth = test_mat[i][j].depth();
-    return depth == CV_8U ? 48 : depth == CV_16U ? 32 : 5e-2;
+    // j == 0 is for forward code, j == 1 is for inverse code
+    return (depth ==  CV_8U) ? (srgb ? 36 : 8) :
+           //(depth == CV_16U) ? 32 : // 16u is disabled
+           5e-2;
 }
@@ -2140,10 +2143,19 @@ static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
 enum
 {
+    lab_lut_shift = 5,
+    LAB_LUT_DIM = (1 << lab_lut_shift)+1,
    lab_base_shift = 14,
    LAB_BASE = (1 << lab_base_shift),
+    trilinear_shift = 8 - lab_lut_shift + 1,
+    TRILINEAR_BASE = (1 << trilinear_shift)
 };
+static int16_t trilinearLUT[TRILINEAR_BASE*TRILINEAR_BASE*TRILINEAR_BASE*8];
+static int16_t RGB2LuvLUT_s16[LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3*8];
+static const softfloat uLow(-134), uHigh(220), uRange(uHigh-uLow);
+static const softfloat vLow(-140), vHigh(122), vRange(vHigh-vLow);
 #define  CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
 static ushort LabToYF_b[256*2];
@@ -2230,6 +2242,110 @@ static void initLabTabs()
            abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231
        }
+        softdouble D65[] = { Xn, softdouble::one(), Zn };
+        softfloat coeffs[9];
+        for(int i = 0; i < 3; i++ )
+        {
+            coeffs[i*3+2] = RGB2XYZ[i*3  ];
+            coeffs[i*3+1] = RGB2XYZ[i*3+1];
+            coeffs[i*3  ] = RGB2XYZ[i*3+2];
+        }
+        softfloat C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
+                  C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
+                  C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
+        softfloat dd = (D65[0] + D65[1]*softdouble(15) + D65[2]*softdouble(3));
+        dd = softfloat::one()/max(dd, softfloat::eps());
+        softfloat un = dd*softfloat(13*4)*D65[0];
+        softfloat vn = dd*softfloat(13*9)*D65[1];
+        //u, v: [-134.0, 220.0], [-140.0, 122.0]
+        static const softfloat lld(LAB_LUT_DIM - 1), f116(116), f16(16);
+        static const softfloat f100(100), lbase((int)LAB_BASE);
+        static const softfloat f9of4 = softfloat(9)/softfloat(4);
+        static const softfloat f15(15), f3(3);
+        AutoBuffer<int16_t> RGB2Luvprev(LAB_LUT_DIM*LAB_LUT_DIM*LAB_LUT_DIM*3);
+        for(int p = 0; p < LAB_LUT_DIM; p++)
+        {
+            for(int q = 0; q < LAB_LUT_DIM; q++)
+            {
+                for(int r = 0; r < LAB_LUT_DIM; r++)
+                {
+                    int idx = p*3 + q*LAB_LUT_DIM*3 + r*LAB_LUT_DIM*LAB_LUT_DIM*3;
+                    softfloat R = softfloat(p)/lld;
+                    softfloat G = softfloat(q)/lld;
+                    softfloat B = softfloat(r)/lld;
+                    R = applyGamma(R);
+                    G = applyGamma(G);
+                    B = applyGamma(B);
+                    //RGB 2 Luv LUT building
+                    {
+                        softfloat X = R*C0 + G*C1 + B*C2;
+                        softfloat Y = R*C3 + G*C4 + B*C5;
+                        softfloat Z = R*C6 + G*C7 + B*C8;
+                        softfloat L = Y < lthresh ? mulAdd(Y, lscale, lbias) : cbrt(Y);
+                        L = L*f116 - f16;
+                        softfloat d = softfloat(4*13)/max(X + f15 * Y + f3 * Z, softfloat(FLT_EPSILON));
+                        softfloat u = L*(X*d - un);
+                        softfloat v = L*(f9of4*Y*d - vn);
+                        RGB2Luvprev[idx  ] = (int16_t)cvRound(lbase*L/f100);
+                        RGB2Luvprev[idx+1] = (int16_t)cvRound(lbase*(u-uLow)/uRange);
+                        RGB2Luvprev[idx+2] = (int16_t)cvRound(lbase*(v-vLow)/vRange);
+                    }
+                }
+            }
+        }
+        for(int p = 0; p < LAB_LUT_DIM; p++)
+        {
+            for(int q = 0; q < LAB_LUT_DIM; q++)
+            {
+                for(int r = 0; r < LAB_LUT_DIM; r++)
+                {
+                    #define FILL(_p, _q, _r) \
+                        do {\
+                        int idxold = 0;\
+                        idxold += min(p+(_p), (int)(LAB_LUT_DIM-1))*3;\
+                        idxold += min(q+(_q), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*3;\
+                        idxold += min(r+(_r), (int)(LAB_LUT_DIM-1))*LAB_LUT_DIM*LAB_LUT_DIM*3;\
+                        int idxnew = p*3*8 + q*LAB_LUT_DIM*3*8 + r*LAB_LUT_DIM*LAB_LUT_DIM*3*8+4*(_p)+2*(_q)+(_r);\
+                        RGB2LuvLUT_s16[idxnew]    = RGB2Luvprev[idxold];\
+                        RGB2LuvLUT_s16[idxnew+8]  = RGB2Luvprev[idxold+1];\
+                        RGB2LuvLUT_s16[idxnew+16] = RGB2Luvprev[idxold+2];\
+                        } while(0)
+                    FILL(0, 0, 0); FILL(0, 0, 1);
+                    FILL(0, 1, 0); FILL(0, 1, 1);
+                    FILL(1, 0, 0); FILL(1, 0, 1);
+                    FILL(1, 1, 0); FILL(1, 1, 1);
+                    #undef FILL
+                }
+            }
+        }
+        for(int16_t p = 0; p < TRILINEAR_BASE; p++)
+        {
+            int16_t pp = TRILINEAR_BASE - p;
+            for(int16_t q = 0; q < TRILINEAR_BASE; q++)
+            {
+                int16_t qq = TRILINEAR_BASE - q;
+                for(int16_t r = 0; r < TRILINEAR_BASE; r++)
+                {
+                    int16_t rr = TRILINEAR_BASE - r;
+                    int16_t* w = &trilinearLUT[8*p + 8*TRILINEAR_BASE*q + 8*TRILINEAR_BASE*TRILINEAR_BASE*r];
+                    w[0]  = pp * qq * rr; w[1]  = pp * qq * r ; w[2]  = pp * q  * rr; w[3]  = pp * q  * r ;
+                    w[4]  = p  * qq * rr; w[5]  = p  * qq * r ; w[6]  = p  * q  * rr; w[7]  = p  * q  * r ;
+                }
+            }
+        }
        initialized = true;
    }
 }
@@ -2338,6 +2454,62 @@ int row8uLab2RGB(const uchar* src_row, uchar *dst_row, int n, int cn, int blue_i
    return n;
 }
+int row8uRGB2Luv(const uchar* src_row, uchar *dst_row, int n, int cn, int blue_idx)
+{
+    for (int x = 0; x < n; x++)
+    {
+        int R = src_row[x*cn + (blue_idx)],
+            G = src_row[x*cn + 1],
+            B = src_row[x*cn + (blue_idx^2)];
+        // (LAB_BASE/255) gives more accuracy but not very much
+        static const int baseDiv = LAB_BASE/256;
+        // cx, cy, cz are in [0; LAB_BASE]
+        int cx = R*baseDiv, cy = G*baseDiv, cz = B*baseDiv;
+        int L, u, v;
+        //LUT idx of origin pt of cube
+        int tx = cx >> (lab_base_shift - lab_lut_shift);
+        int ty = cy >> (lab_base_shift - lab_lut_shift);
+        int tz = cz >> (lab_base_shift - lab_lut_shift);
+        int16_t* baseLUT = &RGB2LuvLUT_s16[3*8*tx + (3*8*LAB_LUT_DIM)*ty + (3*8*LAB_LUT_DIM*LAB_LUT_DIM)*tz];
+        int aa[8], bb[8], cc[8];
+        for(int i = 0; i < 8; i++)
+        {
+            aa[i] = baseLUT[i]; bb[i] = baseLUT[i+8]; cc[i] = baseLUT[i+16];
+        }
+        //x, y, z are [0; TRILINEAR_BASE)
+        static const int bitMask = (1 << trilinear_shift) - 1;
+        int xx = (cx >> (lab_base_shift - 8 - 1)) & bitMask;
+        int yy = (cy >> (lab_base_shift - 8 - 1)) & bitMask;
+        int zz = (cz >> (lab_base_shift - 8 - 1)) & bitMask;
+        int w[8];
+        for(int i = 0; i < 8; i++)
+        {
+            w[i] = trilinearLUT[8*xx + 8*TRILINEAR_BASE*yy + 8*TRILINEAR_BASE*TRILINEAR_BASE*zz + i];
+        }
+        L = aa[0]*w[0]+aa[1]*w[1]+aa[2]*w[2]+aa[3]*w[3]+aa[4]*w[4]+aa[5]*w[5]+aa[6]*w[6]+aa[7]*w[7];
+        u = bb[0]*w[0]+bb[1]*w[1]+bb[2]*w[2]+bb[3]*w[3]+bb[4]*w[4]+bb[5]*w[5]+bb[6]*w[6]+bb[7]*w[7];
+        v = cc[0]*w[0]+cc[1]*w[1]+cc[2]*w[2]+cc[3]*w[3]+cc[4]*w[4]+cc[5]*w[5]+cc[6]*w[6]+cc[7]*w[7];
+        L = CV_DESCALE(L, trilinear_shift*3);
+        u = CV_DESCALE(u, trilinear_shift*3);
+        v = CV_DESCALE(v, trilinear_shift*3);
+        dst_row[x*3    ] = saturate_cast<uchar>(L/baseDiv);
+        dst_row[x*3 + 1] = saturate_cast<uchar>(u/baseDiv);
+        dst_row[x*3 + 2] = saturate_cast<uchar>(v/baseDiv);
+    }
+    return n;
+}
 int row8uLabChoose(const uchar* src_row, uchar *dst_row, int n, bool forward, int blue_idx, bool srgb)
 {
    if(forward)
@@ -2430,6 +2602,85 @@ TEST(Imgproc_ColorLab_Full, bitExactness)
    }
 }
+TEST(Imgproc_ColorLuv_Full, bitExactness)
+{
+    /* to be expanded by more codes when bit-exactness is done for them */
+    int codes[] = { CV_BGR2Luv, CV_RGB2Luv };
+    string names[] = { "CV_BGR2Luv", "CV_RGB2Luv" };
+    size_t nCodes = sizeof(codes)/sizeof(codes[0]);
+    // need to be recalculated each time we change Luv algorithms, RNG or test system
+    const int nIterations = 8;
+    uint32_t hashes[] = {
+        0x9d4d983a, 0xd3d7b220, 0xd503b661, 0x73581d9b, 0x3beec8a6, 0xea6dfc16, 0xc867f4cd, 0x2c97f43a,
+        0x8152fbc9, 0xd7e764a6, 0x5e01f9a3, 0x53e8961e, 0x6a64f1f7, 0x4fa89a44, 0x67096871, 0x4f3bce87,
+    };
+    RNG rng(0);
+    // blueIdx x srgb x direction
+    bool next = true;
+    for(size_t c = 0; next && c < nCodes; c++)
+    {
+        size_t v = c;
+        int  blueIdx = (v % 2 != 0) ? 2 : 0; v /=2;
+        /* bool    srgb = (v % 2 == 0); v /= 2; */
+        /* bool forward = (v % 2 == 0); */
+        for(int iter = 0; next && iter < nIterations; iter++)
+        {
+            Mat probe(256, 256, CV_8UC3), result;
+            rng.fill(probe, RNG::UNIFORM, 0, 255, true);
+            cvtColor(probe, result, codes[c]);
+            uint32_t h = adler32(result);
+            uint32_t goodHash = hashes[c*nIterations + iter];
+            if(h != goodHash)
+            {
+                initLabTabs();
+                vector<uchar> goldBuf(probe.cols*4);
+                uchar* goldRow = &goldBuf[0];
+                for(int y = 0; next && y < probe.rows; y++)
+                {
+                    uchar* probeRow = probe.ptr(y);
+                    uchar* resultRow = result.ptr(y);
+                    row8uRGB2Luv(probeRow, goldRow, probe.cols, 3, blueIdx);
+                    for(int x = 0; next && x < probe.cols; x++)
+                    {
+                        uchar* px = probeRow  + x*3;
+                        uchar* gx = goldRow   + x*3;
+                        uchar* rx = resultRow + x*3;
+                        if(gx[0] != rx[0] || gx[1] != rx[1] || gx[2] != rx[2])
+                        {
+                            next = false;
+                            FAIL() << "Bad accuracy" << endl
+                                   << "Conversion code: " << names[c] << endl
+                                   << "Iteration: " << iter << endl
+                                   << "Hash vs Correct hash: " << h << ", " << goodHash << endl
+                                   << "Error in: (" << x << ", " << y << ")" << endl
+                                   << "Reference value: " << gx[0] << " " << gx[1] << " " << gx[2] << endl
+                                   << "Actual value: "    << rx[0] << " " << rx[1] << " " << rx[2] << endl
+                                   << "Src value: " << px[0] << " " << px[1] << " " << px[2] << endl
+                                   << "Size: (" << probe.rows << ", " << probe.cols << ")" << endl;
+                            break;
+                        }
+                    }
+                }
+                if(next)
+                    // this place should never be reached
+                    throw std::runtime_error("Test system error: hash function mismatch when results are the same");
+            }
+        }
+    }
+}
 static void test_Bayer2RGB_EdgeAware_8u(const Mat& src, Mat& dst, int code)
 {
    if (dst.empty())