rand.cpp 27 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

/* ////////////////////////////////////////////////////////////////////
//
//  Filling CvMat/IplImage instances with random numbers
//
// */

#include "precomp.hpp"

51 52 53 54 55 56 57 58
#if defined WIN32 || defined WINCE
    #include <windows.h>
    #undef small
    #undef min
    #undef max
    #undef abs
#endif

59
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
60
    #include "emmintrin.h"
61 62
#endif

63 64 65 66 67 68 69 70 71 72 73 74
namespace cv
{

///////////////////////////// Functions Declaration //////////////////////////////////////

/*
   Multiply-with-carry generator is used here:
   temp = ( A*X(n) + carry )
   X(n+1) = temp mod (2^32)
   carry = temp / (2^32)
*/

75
#define  RNG_NEXT(x)    ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
76 77 78 79 80 81

/***************************************************************************************\
*                           Pseudo-Random Number Generators (PRNGs)                     *
\***************************************************************************************/

template<typename T> static void
82
randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
83 84
{
    uint64 temp = *state;
85
    int i;
86

87
    if( !small_flag )
88
    {
89
        for( i = 0; i <= len - 4; i += 4 )
90
        {
91
            int t0, t1;
92

93 94 95 96 97 98
            temp = RNG_NEXT(temp);
            t0 = ((int)temp & p[i][0]) + p[i][1];
            temp = RNG_NEXT(temp);
            t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
            arr[i] = saturate_cast<T>(t0);
            arr[i+1] = saturate_cast<T>(t1);
99

100 101 102 103 104 105
            temp = RNG_NEXT(temp);
            t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
            temp = RNG_NEXT(temp);
            t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
            arr[i+2] = saturate_cast<T>(t0);
            arr[i+3] = saturate_cast<T>(t1);
106
        }
107 108 109 110
    }
    else
    {
        for( i = 0; i <= len - 4; i += 4 )
111
        {
112
            int t0, t1, t;
113
            temp = RNG_NEXT(temp);
114 115 116
            t = (int)temp;
            t0 = (t & p[i][0]) + p[i][1];
            t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
117
            arr[i] = saturate_cast<T>(t0);
118 119 120 121 122 123
            arr[i+1] = saturate_cast<T>(t1);

            t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
            t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
            arr[i+2] = saturate_cast<T>(t0);
            arr[i+3] = saturate_cast<T>(t1);
124 125 126
        }
    }

127 128 129 130 131 132 133 134 135
    for( ; i < len; i++ )
    {
        int t0;
        temp = RNG_NEXT(temp);

        t0 = ((int)temp & p[i][0]) + p[i][1];
        arr[i] = saturate_cast<T>(t0);
    }

136 137 138 139 140 141 142 143 144 145 146 147
    *state = temp;
}

struct DivStruct
{
    unsigned d;
    unsigned M;
    int sh1, sh2;
    int delta;
};

template<typename T> static void
148
randi_( T* arr, int len, uint64* state, const DivStruct* p )
149 150
{
    uint64 temp = *state;
151 152 153 154
    int i = 0;
    unsigned t0, t1, v0, v1;

    for( i = 0; i <= len - 4; i += 4 )
155
    {
156 157 158 159 160 161 162 163 164 165 166 167
        temp = RNG_NEXT(temp);
        t0 = (unsigned)temp;
        temp = RNG_NEXT(temp);
        t1 = (unsigned)temp;
        v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
        v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
        v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
        v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
        v0 = t0 - v0*p[i].d + p[i].delta;
        v1 = t1 - v1*p[i+1].d + p[i+1].delta;
        arr[i] = saturate_cast<T>((int)v0);
        arr[i+1] = saturate_cast<T>((int)v1);
168

169 170 171 172 173 174 175 176 177 178 179 180
        temp = RNG_NEXT(temp);
        t0 = (unsigned)temp;
        temp = RNG_NEXT(temp);
        t1 = (unsigned)temp;
        v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
        v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
        v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
        v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
        v0 = t0 - v0*p[i+2].d + p[i+2].delta;
        v1 = t1 - v1*p[i+3].d + p[i+3].delta;
        arr[i+2] = saturate_cast<T>((int)v0);
        arr[i+3] = saturate_cast<T>((int)v1);
181 182
    }

183
    for( ; i < len; i++ )
184
    {
185 186 187 188 189 190
        temp = RNG_NEXT(temp);
        t0 = (unsigned)temp;
        v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
        v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
        v0 = t0 - v0*p[i].d + p[i].delta;
        arr[i] = saturate_cast<T>((int)v0);
191 192 193 194 195
    }

    *state = temp;
}

196

197 198 199 200 201 202 203 204 205 206 207 208 209 210
#define DEF_RANDI_FUNC(suffix, type) \
static void randBits_##suffix(type* arr, int len, uint64* state, \
                              const Vec2i* p, bool small_flag) \
{ randBits_(arr, len, state, p, small_flag); } \
\
static void randi_##suffix(type* arr, int len, uint64* state, \
                           const DivStruct* p, bool ) \
{ randi_(arr, len, state, p); }

DEF_RANDI_FUNC(8u, uchar)
DEF_RANDI_FUNC(8s, schar)
DEF_RANDI_FUNC(16u, ushort)
DEF_RANDI_FUNC(16s, short)
DEF_RANDI_FUNC(32s, int)
211

212
static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
213 214
{
    uint64 temp = *state;
215
    int i = 0;
216

217
    for( ; i <= len - 4; i += 4 )
218
    {
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
        float f[4];
        f[0] = (float)(int)(temp = RNG_NEXT(temp));
        f[1] = (float)(int)(temp = RNG_NEXT(temp));
        f[2] = (float)(int)(temp = RNG_NEXT(temp));
        f[3] = (float)(int)(temp = RNG_NEXT(temp));

        // handwritten SSE is required not for performance but for numerical stability!
        // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
        // while 64-bit compilers generate single precision SIMD instructions
        // so manual vectorisation forces all compilers to the single precision
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
        __m128 q0 = _mm_loadu_ps((const float*)(p + i));
        __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));

        __m128 q01l = _mm_unpacklo_ps(q0, q1);
        __m128 q01h = _mm_unpackhi_ps(q0, q1);

        __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
        __m128 p1 = _mm_unpackhi_ps(q01l, q01h);

        _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
#else
        arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
        arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
        arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
        arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
#endif
246
    }
247

248 249 250
    for( ; i < len; i++ )
    {
        temp = RNG_NEXT(temp);
251 252 253 254 255 256
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
        _mm_store_ss(arr + i, _mm_add_ss(
                _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
                _mm_set_ss(p[i][1]))
                );
#else
257
        arr[i] = (int)temp*p[i][0] + p[i][1];
258
#endif
259 260 261 262 263 264 265
    }

    *state = temp;
}


static void
266
randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
267 268 269
{
    uint64 temp = *state;
    int64 v = 0;
270
    int i;
271

272
    for( i = 0; i <= len - 4; i += 4 )
273
    {
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
        double f0, f1;

        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f0 = v*p[i][0] + p[i][1];
        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f1 = v*p[i+1][0] + p[i+1][1];
        arr[i] = f0; arr[i+1] = f1;

        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f0 = v*p[i+2][0] + p[i+2][1];
        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f1 = v*p[i+3][0] + p[i+3][1];
        arr[i+2] = f0; arr[i+3] = f1;
    }
292

293 294 295 296 297
    for( ; i < len; i++ )
    {
        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        arr[i] = v*p[i][0] + p[i][1];
298 299 300 301 302
    }

    *state = temp;
}

303 304
typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);

305 306 307 308 309 310 311 312 313 314 315

static RandFunc randTab[][8] =
{
    {
        (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
        (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
    },
    {
        (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
        (RandFunc)randBits_32s, 0, 0, 0
    }
316 317
};

318 319 320 321 322 323
/*
   The code below implements the algorithm described in
   "The Ziggurat Method for Generating Random Variables"
   by Marsaglia and Tsang, Journal of Statistical Software.
*/
static void
324
randn_0_1_32f( float* arr, int len, uint64* state )
325 326 327 328 329 330 331 332
{
    const float r = 3.442620f; // The start of the right tail
    const float rng_flt = 2.3283064365386962890625e-10f; // 2^-32
    static unsigned kn[128];
    static float wn[128], fn[128];
    uint64 temp = *state;
    static bool initialized=false;
    int i;
333

334 335 336 337
    if( !initialized )
    {
        const double m1 = 2147483648.0;
        double dn = 3.442619855899, tn = dn, vn = 9.91256303526217e-3;
338

339 340 341 342
        // Set up the tables
        double q = vn/std::exp(-.5*dn*dn);
        kn[0] = (unsigned)((dn/q)*m1);
        kn[1] = 0;
343

344 345
        wn[0] = (float)(q/m1);
        wn[127] = (float)(dn/m1);
346

347 348
        fn[0] = 1.f;
        fn[127] = (float)std::exp(-.5*dn*dn);
349

350 351 352 353 354 355 356 357 358 359
        for(i=126;i>=1;i--)
        {
            dn = std::sqrt(-2.*std::log(vn/dn+std::exp(-.5*dn*dn)));
            kn[i+1] = (unsigned)((dn/tn)*m1);
            tn = dn;
            fn[i] = (float)std::exp(-.5*dn*dn);
            wn[i] = (float)(dn/m1);
        }
        initialized = true;
    }
360

361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
    for( i = 0; i < len; i++ )
    {
        float x, y;
        for(;;)
        {
            int hz = (int)temp;
            temp = RNG_NEXT(temp);
            int iz = hz & 127;
            x = hz*wn[iz];
            if( (unsigned)std::abs(hz) < kn[iz] )
                break;
            if( iz == 0) // iz==0, handles the base strip
            {
                do
                {
                    x = (unsigned)temp*rng_flt;
                    temp = RNG_NEXT(temp);
                    y = (unsigned)temp*rng_flt;
                    temp = RNG_NEXT(temp);
                    x = (float)(-std::log(x+FLT_MIN)*0.2904764);
                    y = (float)-std::log(y+FLT_MIN);
                }	// .2904764 is 1/r
                while( y + y < x*x );
                x = hz > 0 ? r + x : -r - x;
                break;
            }
            // iz > 0, handle the wedges of other strips
            y = (unsigned)temp*rng_flt;
            temp = RNG_NEXT(temp);
            if( fn[iz] + y*(fn[iz - 1] - fn[iz]) < std::exp(-.5*x*x) )
                break;
        }
        arr[i] = x;
    }
    *state = temp;
}

398

399 400 401
double RNG::gaussian(double sigma)
{
    float temp;
402
    randn_0_1_32f( &temp, 1, &state );
403 404 405
    return temp*sigma;
}

406

407
template<typename T, typename PT> static void
408
randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
409
{
410 411
    int i, j, k;
    if( !stdmtx )
412
    {
413
        if( cn == 1 )
414
        {
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
            PT b = mean[0], a = stddev[0];
            for( i = 0; i < len; i++ )
                dst[i] = saturate_cast<T>(src[i]*a + b);
        }
        else
        {
            for( i = 0; i < len; i++, src += cn, dst += cn )
                for( k = 0; k < cn; k++ )
                    dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
        }
    }
    else
    {
        for( i = 0; i < len; i++, src += cn, dst += cn )
        {
            for( j = 0; j < cn; j++ )
            {
                PT s = mean[j];
                for( k = 0; k < cn; k++ )
                    s += src[k]*stddev[j*cn + k];
                dst[j] = saturate_cast<T>(s);
            }
437 438 439
        }
    }
}
440

441 442 443 444 445 446 447
static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
                            const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

static void randnScale_8s( const float* src, schar* dst, int len, int cn,
                            const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
448

449 450 451
static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
452

453 454 455
static void randnScale_16s( const float* src, short* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
456

457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
static void randnScale_32s( const float* src, int* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

static void randnScale_32f( const float* src, float* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

static void randnScale_64f( const float* src, double* dst, int len, int cn,
                             const double* mean, const double* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
                               const uchar*, const uchar*, bool);

static RandnScaleFunc randnScaleTab[] =
473
{
474 475
    (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
    (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
476
    (RandnScaleFunc)randnScale_64f, 0
477
};
478

479 480
void RNG::fill( InputOutputArray _mat, int disttype,
                InputArray _param1arg, InputArray _param2arg, bool saturateRange )
481 482 483 484 485
{
    Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
    int depth = mat.depth(), cn = mat.channels();
    AutoBuffer<double> _parambuf;
    int j, k, fast_int_mode = 0, smallFlag = 1;
486
    RandFunc func = 0;
487
    RandnScaleFunc scaleFunc = 0;
488

489
    CV_Assert(_param1.channels() == 1 && (_param1.rows == 1 || _param1.cols == 1) &&
490
              (_param1.rows + _param1.cols - 1 == cn || _param1.rows + _param1.cols - 1 == 1 ||
491 492
               (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4)));
    CV_Assert( _param2.channels() == 1 &&
493
               (((_param2.rows == 1 || _param2.cols == 1) &&
494
                (_param2.rows + _param2.cols - 1 == cn || _param2.rows + _param2.cols - 1 == 1 ||
495 496
                (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
                (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
497

498 499 500 501 502 503 504
    Vec2i* ip = 0;
    Vec2d* dp = 0;
    Vec2f* fp = 0;
    DivStruct* ds = 0;
    uchar* mean = 0;
    uchar* stddev = 0;
    bool stdmtx = false;
505 506
    int n1 = (int)_param1.total();
    int n2 = (int)_param2.total();
507 508 509

    if( disttype == UNIFORM )
    {
510
        _parambuf.allocate(cn*8 + n1 + n2);
511
        double* parambuf = _parambuf;
512 513
        double* p1 = (double*)_param1.data;
        double* p2 = (double*)_param2.data;
514

515
        if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
516 517 518 519
        {
            Mat tmp(_param1.size(), CV_64F, parambuf);
            _param1.convertTo(tmp, CV_64F);
            p1 = parambuf;
520 521 522
            if( n1 < cn )
                for( j = n1; j < cn; j++ )
                    p1[j] = p1[j-n1];
523
        }
524

525
        if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
526 527 528 529
        {
            Mat tmp(_param2.size(), CV_64F, parambuf + cn);
            _param2.convertTo(tmp, CV_64F);
            p2 = parambuf + cn;
530 531 532
            if( n2 < cn )
                for( j = n2; j < cn; j++ )
                    p2[j] = p2[j-n2];
533
        }
534

535 536
        if( depth <= CV_32S )
        {
537 538
            ip = (Vec2i*)(parambuf + cn*2);
            for( j = 0, fast_int_mode = 1; j < cn; j++ )
539
            {
540 541
                double a = min(p1[j], p2[j]);
                double b = max(p1[j], p2[j]);
542 543 544 545 546 547 548
                if( saturateRange )
                {
                    a = max(a, depth == CV_8U || depth == CV_16U ? 0. :
                            depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
                    b = min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
                            depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
                }
549 550
                ip[j][1] = cvCeil(a);
                int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
551 552
                double diff = b - a;

553 554 555
                fast_int_mode &= diff <= 4294967296. && (idiff & (idiff+1)) == 0;
                if( fast_int_mode )
                    smallFlag &= idiff <= 255;
556 557 558 559 560 561 562
                else
                {
                    if( diff > INT_MAX )
                        ip[j][0] = INT_MAX;
                    if( a < INT_MIN/2 )
                        ip[j][1] = INT_MIN/2;
                }
563
            }
564

565
            if( !fast_int_mode )
566
            {
567 568 569 570 571 572 573 574 575 576 577
                ds = (DivStruct*)(ip + cn);
                for( j = 0; j < cn; j++ )
                {
                    ds[j].delta = ip[j][1];
                    unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
                    int l = 0;
                    while(((uint64)1 << l) < d)
                        l++;
                    ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
                    ds[j].sh1 = min(l, 1);
                    ds[j].sh2 = max(l - 1, 0);
578
                }
579
            }
580

581
            func = randTab[fast_int_mode][depth];
582 583 584 585 586 587
        }
        else
        {
            double scale = depth == CV_64F ?
                5.4210108624275221700372640043497e-20 : // 2**-64
                2.3283064365386962890625e-10;           // 2**-32
588
            double maxdiff = saturateRange ? (double)FLT_MAX : DBL_MAX;
589 590 591 592 593

            // for each channel i compute such dparam[0][i] & dparam[1][i],
            // so that a signed 32/64-bit integer X is transformed to
            // the range [param1.val[i], param2.val[i]) using
            // dparam[1][i]*X + dparam[0][i]
594
            if( depth == CV_32F )
595
            {
596 597 598
                fp = (Vec2f*)(parambuf + cn*2);
                for( j = 0; j < cn; j++ )
                {
599
                    fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
600 601 602 603 604 605 606 607
                    fp[j][1] = (float)((p2[j] + p1[j])*0.5);
                }
            }
            else
            {
                dp = (Vec2d*)(parambuf + cn*2);
                for( j = 0; j < cn; j++ )
                {
608
                    dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
609 610
                    dp[j][1] = ((p2[j] + p1[j])*0.5);
                }
611
            }
612 613

            func = randTab[0][depth];
614
        }
615
        CV_Assert( func != 0 );
616 617 618
    }
    else if( disttype == CV_RAND_NORMAL )
    {
619
        _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
620
        double* parambuf = _parambuf;
621

622
        int ptype = depth == CV_64F ? CV_64F : CV_32F;
623
        int esz = (int)CV_ELEM_SIZE(ptype);
624

625 626 627
        if( _param1.isContinuous() && _param1.type() == ptype )
            mean = _param1.data;
        else
628
        {
629 630 631
            Mat tmp(_param1.size(), ptype, parambuf);
            _param1.convertTo(tmp, ptype);
            mean = (uchar*)parambuf;
632
        }
633

634 635 636
        if( n1 < cn )
            for( j = n1*esz; j < cn*esz; j++ )
                mean[j] = mean[j - n1*esz];
637

638 639 640 641 642 643 644 645
        if( _param2.isContinuous() && _param2.type() == ptype )
            stddev = _param2.data;
        else
        {
            Mat tmp(_param2.size(), ptype, parambuf + cn);
            _param2.convertTo(tmp, ptype);
            stddev = (uchar*)(parambuf + cn);
        }
646

647 648 649
        if( n1 < cn )
            for( j = n1*esz; j < cn*esz; j++ )
                stddev[j] = stddev[j - n1*esz];
650

651 652 653
        stdmtx = _param2.rows == cn && _param2.cols == cn;
        scaleFunc = randnScaleTab[depth];
        CV_Assert( scaleFunc != 0 );
654 655 656 657
    }
    else
        CV_Error( CV_StsBadArg, "Unknown distribution type" );

658 659 660 661 662 663 664 665
    const Mat* arrays[] = {&mat, 0};
    uchar* ptr;
    NAryMatIterator it(arrays, &ptr);
    int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
    size_t esz = mat.elemSize();
    AutoBuffer<double> buf;
    uchar* param = 0;
    float* nbuf = 0;
666

667
    if( disttype == UNIFORM )
668
    {
669 670
        buf.allocate(blockSize*cn*4);
        param = (uchar*)(double*)buf;
671

672
        if( ip )
673
        {
674 675 676 677 678 679 680 681
            if( ds )
            {
                DivStruct* p = (DivStruct*)param;
                for( j = 0; j < blockSize*cn; j += cn )
                    for( k = 0; k < cn; k++ )
                        p[j + k] = ds[k];
            }
            else
682
            {
683 684 685 686
                Vec2i* p = (Vec2i*)param;
                for( j = 0; j < blockSize*cn; j += cn )
                    for( k = 0; k < cn; k++ )
                        p[j + k] = ip[k];
687
            }
688 689 690 691 692 693 694 695 696 697 698 699 700 701
        }
        else if( fp )
        {
            Vec2f* p = (Vec2f*)param;
            for( j = 0; j < blockSize*cn; j += cn )
                for( k = 0; k < cn; k++ )
                    p[j + k] = fp[k];
        }
        else
        {
            Vec2d* p = (Vec2d*)param;
            for( j = 0; j < blockSize*cn; j += cn )
                for( k = 0; k < cn; k++ )
                    p[j + k] = dp[k];
702 703
        }
    }
704 705 706 707 708
    else
    {
        buf.allocate((blockSize*cn+1)/2);
        nbuf = (float*)(double*)buf;
    }
709

710
    for( size_t i = 0; i < it.nplanes; i++, ++it )
711
    {
712 713 714
        for( j = 0; j < total; j += blockSize )
        {
            int len = std::min(total - j, blockSize);
715

716 717 718 719 720 721 722 723 724
            if( disttype == CV_RAND_UNI )
                func( ptr, len*cn, &state, param, smallFlag != 0 );
            else
            {
                randn_0_1_32f(nbuf, len*cn, &state);
                scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
            }
            ptr += len*esz;
        }
725
    }
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
}

#ifdef WIN32
#ifdef WINCE
#	define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
#endif
static DWORD tlsRNGKey = TLS_OUT_OF_INDEXES;

void deleteThreadRNGData()
{
    if( tlsRNGKey != TLS_OUT_OF_INDEXES )
        delete (RNG*)TlsGetValue( tlsRNGKey );
}

RNG& theRNG()
{
    if( tlsRNGKey == TLS_OUT_OF_INDEXES )
    {
        tlsRNGKey = TlsAlloc();
        CV_Assert(tlsRNGKey != TLS_OUT_OF_INDEXES);
    }
    RNG* rng = (RNG*)TlsGetValue( tlsRNGKey );
    if( !rng )
    {
        rng = new RNG;
        TlsSetValue( tlsRNGKey, rng );
    }
    return *rng;
}

#else

static pthread_key_t tlsRNGKey = 0;
759
static pthread_once_t tlsRNGKeyOnce = PTHREAD_ONCE_INIT;
760 761 762 763 764 765

static void deleteRNG(void* data)
{
    delete (RNG*)data;
}

766 767
static void makeRNGKey()
{
768 769
    int errcode = pthread_key_create(&tlsRNGKey, deleteRNG);
    CV_Assert(errcode == 0);
770 771
}

772 773
RNG& theRNG()
{
774
    pthread_once(&tlsRNGKeyOnce, makeRNGKey);
775 776 777 778 779 780 781 782 783 784 785
    RNG* rng = (RNG*)pthread_getspecific(tlsRNGKey);
    if( !rng )
    {
        rng = new RNG;
        pthread_setspecific(tlsRNGKey, rng);
    }
    return *rng;
}

#endif

786
}
787

788
void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
789 790 791 792
{
    theRNG().fill(dst, RNG::UNIFORM, low, high);
}

793
void cv::randn(InputOutputArray dst, InputArray mean, InputArray stddev)
794 795
{
    theRNG().fill(dst, RNG::NORMAL, mean, stddev);
796 797
}

798 799 800
namespace cv
{

801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
template<typename T> static void
randShuffle_( Mat& _arr, RNG& rng, double iterFactor )
{
    int sz = _arr.rows*_arr.cols, iters = cvRound(iterFactor*sz);
    if( _arr.isContinuous() )
    {
        T* arr = (T*)_arr.data;
        for( int i = 0; i < iters; i++ )
        {
            int j = (unsigned)rng % sz, k = (unsigned)rng % sz;
            std::swap( arr[j], arr[k] );
        }
    }
    else
    {
        uchar* data = _arr.data;
        size_t step = _arr.step;
        int cols = _arr.cols;
        for( int i = 0; i < iters; i++ )
        {
            int j1 = (unsigned)rng % sz, k1 = (unsigned)rng % sz;
            int j0 = j1/cols, k0 = k1/cols;
            j1 -= j0*cols; k1 -= k0*cols;
            std::swap( ((T*)(data + step*j0))[j1], ((T*)(data + step*k0))[k1] );
        }
    }
}

typedef void (*RandShuffleFunc)( Mat& dst, RNG& rng, double iterFactor );

831
}
832

833
void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
834 835 836 837 838 839 840 841 842 843 844
{
    RandShuffleFunc tab[] =
    {
        0,
        randShuffle_<uchar>, // 1
        randShuffle_<ushort>, // 2
        randShuffle_<Vec<uchar,3> >, // 3
        randShuffle_<int>, // 4
        0,
        randShuffle_<Vec<ushort,3> >, // 6
        0,
845
        randShuffle_<Vec<int,2> >, // 8
846 847 848
        0, 0, 0,
        randShuffle_<Vec<int,3> >, // 12
        0, 0, 0,
849
        randShuffle_<Vec<int,4> >, // 16
850
        0, 0, 0, 0, 0, 0, 0,
851
        randShuffle_<Vec<int,6> >, // 24
852
        0, 0, 0, 0, 0, 0, 0,
853
        randShuffle_<Vec<int,8> > // 32
854
    };
855

856
    Mat dst = _dst.getMat();
857 858 859 860 861 862 863
    RNG& rng = _rng ? *_rng : theRNG();
    CV_Assert( dst.elemSize() <= 32 );
    RandShuffleFunc func = tab[dst.elemSize()];
    CV_Assert( func != 0 );
    func( dst, rng, iterFactor );
}

864 865 866 867 868
void cv::randShuffle_( InputOutputArray _dst, double iterFactor )
{
    randShuffle(_dst, iterFactor);
}

869 870 871 872 873 874 875
CV_IMPL void
cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
{
    cv::Mat mat = cv::cvarrToMat(arr);
    // !!! this will only work for current 64-bit MWC RNG !!!
    cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
    rng.fill(mat, disttype == CV_RAND_NORMAL ?
876
        cv::RNG::NORMAL : cv::RNG::UNIFORM, cv::Scalar(param1), cv::Scalar(param2) );
877 878 879 880 881 882 883 884 885 886
}

CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
{
    cv::Mat dst = cv::cvarrToMat(arr);
    cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
    cv::randShuffle( dst, iter_factor, &rng );
}

/* End of file. */