rand.cpp 30.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

/* ////////////////////////////////////////////////////////////////////
//
//  Filling CvMat/IplImage instances with random numbers
//
// */

#include "precomp.hpp"

51 52 53 54 55 56
#if defined WIN32 || defined WINCE
    #include <windows.h>
    #undef small
    #undef min
    #undef max
    #undef abs
57 58
#else
    #include <pthread.h>
59 60
#endif

61
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
62
    #include "emmintrin.h"
63 64
#endif

65 66 67 68 69 70 71 72 73 74 75 76
namespace cv
{

///////////////////////////// Functions Declaration //////////////////////////////////////

/*
   Multiply-with-carry generator is used here:
   temp = ( A*X(n) + carry )
   X(n+1) = temp mod (2^32)
   carry = temp / (2^32)
*/

77
#define  RNG_NEXT(x)    ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
78 79 80 81 82 83

/***************************************************************************************\
*                           Pseudo-Random Number Generators (PRNGs)                     *
\***************************************************************************************/

template<typename T> static void
84
randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag )
85 86
{
    uint64 temp = *state;
87
    int i;
88

89
    if( !small_flag )
90
    {
91
        for( i = 0; i <= len - 4; i += 4 )
92
        {
93
            int t0, t1;
94

95 96 97 98 99 100
            temp = RNG_NEXT(temp);
            t0 = ((int)temp & p[i][0]) + p[i][1];
            temp = RNG_NEXT(temp);
            t1 = ((int)temp & p[i+1][0]) + p[i+1][1];
            arr[i] = saturate_cast<T>(t0);
            arr[i+1] = saturate_cast<T>(t1);
101

102 103 104 105 106 107
            temp = RNG_NEXT(temp);
            t0 = ((int)temp & p[i+2][0]) + p[i+2][1];
            temp = RNG_NEXT(temp);
            t1 = ((int)temp & p[i+3][0]) + p[i+3][1];
            arr[i+2] = saturate_cast<T>(t0);
            arr[i+3] = saturate_cast<T>(t1);
108
        }
109 110 111 112
    }
    else
    {
        for( i = 0; i <= len - 4; i += 4 )
113
        {
114
            int t0, t1, t;
115
            temp = RNG_NEXT(temp);
116 117 118
            t = (int)temp;
            t0 = (t & p[i][0]) + p[i][1];
            t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1];
119
            arr[i] = saturate_cast<T>(t0);
120 121 122 123 124 125
            arr[i+1] = saturate_cast<T>(t1);

            t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1];
            t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1];
            arr[i+2] = saturate_cast<T>(t0);
            arr[i+3] = saturate_cast<T>(t1);
126 127 128
        }
    }

129 130 131 132 133 134 135 136 137
    for( ; i < len; i++ )
    {
        int t0;
        temp = RNG_NEXT(temp);

        t0 = ((int)temp & p[i][0]) + p[i][1];
        arr[i] = saturate_cast<T>(t0);
    }

138 139 140 141 142 143 144 145 146 147 148 149
    *state = temp;
}

struct DivStruct
{
    unsigned d;
    unsigned M;
    int sh1, sh2;
    int delta;
};

template<typename T> static void
150
randi_( T* arr, int len, uint64* state, const DivStruct* p )
151 152
{
    uint64 temp = *state;
153 154 155 156
    int i = 0;
    unsigned t0, t1, v0, v1;

    for( i = 0; i <= len - 4; i += 4 )
157
    {
158 159 160 161 162 163 164 165 166 167 168 169
        temp = RNG_NEXT(temp);
        t0 = (unsigned)temp;
        temp = RNG_NEXT(temp);
        t1 = (unsigned)temp;
        v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
        v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
        v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
        v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
        v0 = t0 - v0*p[i].d + p[i].delta;
        v1 = t1 - v1*p[i+1].d + p[i+1].delta;
        arr[i] = saturate_cast<T>((int)v0);
        arr[i+1] = saturate_cast<T>((int)v1);
170

171 172 173 174 175 176 177 178 179 180 181 182
        temp = RNG_NEXT(temp);
        t0 = (unsigned)temp;
        temp = RNG_NEXT(temp);
        t1 = (unsigned)temp;
        v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
        v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
        v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
        v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
        v0 = t0 - v0*p[i+2].d + p[i+2].delta;
        v1 = t1 - v1*p[i+3].d + p[i+3].delta;
        arr[i+2] = saturate_cast<T>((int)v0);
        arr[i+3] = saturate_cast<T>((int)v1);
183 184
    }

185
    for( ; i < len; i++ )
186
    {
187 188 189 190 191 192
        temp = RNG_NEXT(temp);
        t0 = (unsigned)temp;
        v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
        v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
        v0 = t0 - v0*p[i].d + p[i].delta;
        arr[i] = saturate_cast<T>((int)v0);
193 194 195 196 197
    }

    *state = temp;
}

198

199 200 201 202 203 204 205 206 207 208 209 210 211 212
#define DEF_RANDI_FUNC(suffix, type) \
static void randBits_##suffix(type* arr, int len, uint64* state, \
                              const Vec2i* p, bool small_flag) \
{ randBits_(arr, len, state, p, small_flag); } \
\
static void randi_##suffix(type* arr, int len, uint64* state, \
                           const DivStruct* p, bool ) \
{ randi_(arr, len, state, p); }

DEF_RANDI_FUNC(8u, uchar)
DEF_RANDI_FUNC(8s, schar)
DEF_RANDI_FUNC(16u, ushort)
DEF_RANDI_FUNC(16s, short)
DEF_RANDI_FUNC(32s, int)
213

214
static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
215 216
{
    uint64 temp = *state;
217
    int i = 0;
218

219
    for( ; i <= len - 4; i += 4 )
220
    {
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
        float f[4];
        f[0] = (float)(int)(temp = RNG_NEXT(temp));
        f[1] = (float)(int)(temp = RNG_NEXT(temp));
        f[2] = (float)(int)(temp = RNG_NEXT(temp));
        f[3] = (float)(int)(temp = RNG_NEXT(temp));

        // handwritten SSE is required not for performance but for numerical stability!
        // both 32-bit gcc and MSVC compilers trend to generate double precision SSE
        // while 64-bit compilers generate single precision SIMD instructions
        // so manual vectorisation forces all compilers to the single precision
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
        __m128 q0 = _mm_loadu_ps((const float*)(p + i));
        __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));

        __m128 q01l = _mm_unpacklo_ps(q0, q1);
        __m128 q01h = _mm_unpackhi_ps(q0, q1);

        __m128 p0 = _mm_unpacklo_ps(q01l, q01h);
        __m128 p1 = _mm_unpackhi_ps(q01l, q01h);

        _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
#else
        arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
        arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
        arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
        arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
#endif
248
    }
249

250 251 252
    for( ; i < len; i++ )
    {
        temp = RNG_NEXT(temp);
253 254 255 256 257 258
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
        _mm_store_ss(arr + i, _mm_add_ss(
                _mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
                _mm_set_ss(p[i][1]))
                );
#else
259
        arr[i] = (int)temp*p[i][0] + p[i][1];
260
#endif
261 262 263 264 265 266 267
    }

    *state = temp;
}


static void
268
randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
269 270 271
{
    uint64 temp = *state;
    int64 v = 0;
272
    int i;
273

274
    for( i = 0; i <= len - 4; i += 4 )
275
    {
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
        double f0, f1;

        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f0 = v*p[i][0] + p[i][1];
        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f1 = v*p[i+1][0] + p[i+1][1];
        arr[i] = f0; arr[i+1] = f1;

        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f0 = v*p[i+2][0] + p[i+2][1];
        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        f1 = v*p[i+3][0] + p[i+3][1];
        arr[i+2] = f0; arr[i+3] = f1;
    }
294

295 296 297 298 299
    for( ; i < len; i++ )
    {
        temp = RNG_NEXT(temp);
        v = (temp >> 32)|(temp << 32);
        arr[i] = v*p[i][0] + p[i][1];
300 301 302 303 304
    }

    *state = temp;
}

305 306
typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);

307 308 309 310 311 312 313 314 315 316 317

static RandFunc randTab[][8] =
{
    {
        (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
        (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
    },
    {
        (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
        (RandFunc)randBits_32s, 0, 0, 0
    }
318 319
};

320 321 322 323 324 325
/*
   The code below implements the algorithm described in
   "The Ziggurat Method for Generating Random Variables"
   by Marsaglia and Tsang, Journal of Statistical Software.
*/
static void
326
randn_0_1_32f( float* arr, int len, uint64* state )
327 328 329 330 331 332 333 334
{
    const float r = 3.442620f; // The start of the right tail
    const float rng_flt = 2.3283064365386962890625e-10f; // 2^-32
    static unsigned kn[128];
    static float wn[128], fn[128];
    uint64 temp = *state;
    static bool initialized=false;
    int i;
335

336 337 338 339
    if( !initialized )
    {
        const double m1 = 2147483648.0;
        double dn = 3.442619855899, tn = dn, vn = 9.91256303526217e-3;
340

341 342 343 344
        // Set up the tables
        double q = vn/std::exp(-.5*dn*dn);
        kn[0] = (unsigned)((dn/q)*m1);
        kn[1] = 0;
345

346 347
        wn[0] = (float)(q/m1);
        wn[127] = (float)(dn/m1);
348

349 350
        fn[0] = 1.f;
        fn[127] = (float)std::exp(-.5*dn*dn);
351

352 353 354 355 356 357 358 359 360 361
        for(i=126;i>=1;i--)
        {
            dn = std::sqrt(-2.*std::log(vn/dn+std::exp(-.5*dn*dn)));
            kn[i+1] = (unsigned)((dn/tn)*m1);
            tn = dn;
            fn[i] = (float)std::exp(-.5*dn*dn);
            wn[i] = (float)(dn/m1);
        }
        initialized = true;
    }
362

363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
    for( i = 0; i < len; i++ )
    {
        float x, y;
        for(;;)
        {
            int hz = (int)temp;
            temp = RNG_NEXT(temp);
            int iz = hz & 127;
            x = hz*wn[iz];
            if( (unsigned)std::abs(hz) < kn[iz] )
                break;
            if( iz == 0) // iz==0, handles the base strip
            {
                do
                {
                    x = (unsigned)temp*rng_flt;
                    temp = RNG_NEXT(temp);
                    y = (unsigned)temp*rng_flt;
                    temp = RNG_NEXT(temp);
                    x = (float)(-std::log(x+FLT_MIN)*0.2904764);
                    y = (float)-std::log(y+FLT_MIN);
                }	// .2904764 is 1/r
                while( y + y < x*x );
                x = hz > 0 ? r + x : -r - x;
                break;
            }
            // iz > 0, handle the wedges of other strips
            y = (unsigned)temp*rng_flt;
            temp = RNG_NEXT(temp);
            if( fn[iz] + y*(fn[iz - 1] - fn[iz]) < std::exp(-.5*x*x) )
                break;
        }
        arr[i] = x;
    }
    *state = temp;
}

400

401 402 403
double RNG::gaussian(double sigma)
{
    float temp;
404
    randn_0_1_32f( &temp, 1, &state );
405 406 407
    return temp*sigma;
}

408

409
template<typename T, typename PT> static void
410
randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx )
411
{
412 413
    int i, j, k;
    if( !stdmtx )
414
    {
415
        if( cn == 1 )
416
        {
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
            PT b = mean[0], a = stddev[0];
            for( i = 0; i < len; i++ )
                dst[i] = saturate_cast<T>(src[i]*a + b);
        }
        else
        {
            for( i = 0; i < len; i++, src += cn, dst += cn )
                for( k = 0; k < cn; k++ )
                    dst[k] = saturate_cast<T>(src[k]*stddev[k] + mean[k]);
        }
    }
    else
    {
        for( i = 0; i < len; i++, src += cn, dst += cn )
        {
            for( j = 0; j < cn; j++ )
            {
                PT s = mean[j];
                for( k = 0; k < cn; k++ )
                    s += src[k]*stddev[j*cn + k];
                dst[j] = saturate_cast<T>(s);
            }
439 440 441
        }
    }
}
442

443 444 445 446 447 448 449
static void randnScale_8u( const float* src, uchar* dst, int len, int cn,
                            const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

static void randnScale_8s( const float* src, schar* dst, int len, int cn,
                            const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
450

451 452 453
static void randnScale_16u( const float* src, ushort* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
454

455 456 457
static void randnScale_16s( const float* src, short* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }
458

459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
static void randnScale_32s( const float* src, int* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

static void randnScale_32f( const float* src, float* dst, int len, int cn,
                             const float* mean, const float* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

static void randnScale_64f( const float* src, double* dst, int len, int cn,
                             const double* mean, const double* stddev, bool stdmtx )
{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); }

typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn,
                               const uchar*, const uchar*, bool);

static RandnScaleFunc randnScaleTab[] =
475
{
476 477
    (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u,
    (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f,
478
    (RandnScaleFunc)randnScale_64f, 0
479
};
480

481 482
void RNG::fill( InputOutputArray _mat, int disttype,
                InputArray _param1arg, InputArray _param2arg, bool saturateRange )
483 484 485 486 487
{
    Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
    int depth = mat.depth(), cn = mat.channels();
    AutoBuffer<double> _parambuf;
    int j, k, fast_int_mode = 0, smallFlag = 1;
488
    RandFunc func = 0;
489
    RandnScaleFunc scaleFunc = 0;
490

491
    CV_Assert(_param1.channels() == 1 && (_param1.rows == 1 || _param1.cols == 1) &&
492
              (_param1.rows + _param1.cols - 1 == cn || _param1.rows + _param1.cols - 1 == 1 ||
493 494
               (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4)));
    CV_Assert( _param2.channels() == 1 &&
495
               (((_param2.rows == 1 || _param2.cols == 1) &&
496
                (_param2.rows + _param2.cols - 1 == cn || _param2.rows + _param2.cols - 1 == 1 ||
497 498
                (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) ||
                (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL)));
499

500 501 502 503 504 505 506
    Vec2i* ip = 0;
    Vec2d* dp = 0;
    Vec2f* fp = 0;
    DivStruct* ds = 0;
    uchar* mean = 0;
    uchar* stddev = 0;
    bool stdmtx = false;
507 508
    int n1 = (int)_param1.total();
    int n2 = (int)_param2.total();
509 510 511

    if( disttype == UNIFORM )
    {
512
        _parambuf.allocate(cn*8 + n1 + n2);
513
        double* parambuf = _parambuf;
514 515
        double* p1 = _param1.ptr<double>();
        double* p2 = _param2.ptr<double>();
516

517
        if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn )
518 519 520 521
        {
            Mat tmp(_param1.size(), CV_64F, parambuf);
            _param1.convertTo(tmp, CV_64F);
            p1 = parambuf;
522 523 524
            if( n1 < cn )
                for( j = n1; j < cn; j++ )
                    p1[j] = p1[j-n1];
525
        }
526

527
        if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn )
528 529 530 531
        {
            Mat tmp(_param2.size(), CV_64F, parambuf + cn);
            _param2.convertTo(tmp, CV_64F);
            p2 = parambuf + cn;
532 533 534
            if( n2 < cn )
                for( j = n2; j < cn; j++ )
                    p2[j] = p2[j-n2];
535
        }
536

537 538
        if( depth <= CV_32S )
        {
539 540
            ip = (Vec2i*)(parambuf + cn*2);
            for( j = 0, fast_int_mode = 1; j < cn; j++ )
541
            {
542 543
                double a = std::min(p1[j], p2[j]);
                double b = std::max(p1[j], p2[j]);
544 545
                if( saturateRange )
                {
546
                    a = std::max(a, depth == CV_8U || depth == CV_16U ? 0. :
547
                            depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN);
548
                    b = std::min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. :
549 550
                            depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX);
                }
551 552
                ip[j][1] = cvCeil(a);
                int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1;
553 554
                double diff = b - a;

555 556 557
                fast_int_mode &= diff <= 4294967296. && (idiff & (idiff+1)) == 0;
                if( fast_int_mode )
                    smallFlag &= idiff <= 255;
558 559 560 561 562 563 564
                else
                {
                    if( diff > INT_MAX )
                        ip[j][0] = INT_MAX;
                    if( a < INT_MIN/2 )
                        ip[j][1] = INT_MIN/2;
                }
565
            }
566

567
            if( !fast_int_mode )
568
            {
569 570 571 572 573 574 575 576 577
                ds = (DivStruct*)(ip + cn);
                for( j = 0; j < cn; j++ )
                {
                    ds[j].delta = ip[j][1];
                    unsigned d = ds[j].d = (unsigned)(ip[j][0]+1);
                    int l = 0;
                    while(((uint64)1 << l) < d)
                        l++;
                    ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1;
578 579
                    ds[j].sh1 = std::min(l, 1);
                    ds[j].sh2 = std::max(l - 1, 0);
580
                }
581
            }
582

583
            func = randTab[fast_int_mode][depth];
584 585 586 587 588 589
        }
        else
        {
            double scale = depth == CV_64F ?
                5.4210108624275221700372640043497e-20 : // 2**-64
                2.3283064365386962890625e-10;           // 2**-32
590
            double maxdiff = saturateRange ? (double)FLT_MAX : DBL_MAX;
591 592 593 594 595

            // for each channel i compute such dparam[0][i] & dparam[1][i],
            // so that a signed 32/64-bit integer X is transformed to
            // the range [param1.val[i], param2.val[i]) using
            // dparam[1][i]*X + dparam[0][i]
596
            if( depth == CV_32F )
597
            {
598 599 600
                fp = (Vec2f*)(parambuf + cn*2);
                for( j = 0; j < cn; j++ )
                {
601
                    fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale);
602 603 604 605 606 607 608 609
                    fp[j][1] = (float)((p2[j] + p1[j])*0.5);
                }
            }
            else
            {
                dp = (Vec2d*)(parambuf + cn*2);
                for( j = 0; j < cn; j++ )
                {
610
                    dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale;
611 612
                    dp[j][1] = ((p2[j] + p1[j])*0.5);
                }
613
            }
614 615

            func = randTab[0][depth];
616
        }
617
        CV_Assert( func != 0 );
618 619 620
    }
    else if( disttype == CV_RAND_NORMAL )
    {
621
        _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn));
622
        double* parambuf = _parambuf;
623

624
        int ptype = depth == CV_64F ? CV_64F : CV_32F;
625
        int esz = (int)CV_ELEM_SIZE(ptype);
626

627
        if( _param1.isContinuous() && _param1.type() == ptype )
628
            mean = _param1.ptr();
629
        else
630
        {
631 632 633
            Mat tmp(_param1.size(), ptype, parambuf);
            _param1.convertTo(tmp, ptype);
            mean = (uchar*)parambuf;
634
        }
635

636 637 638
        if( n1 < cn )
            for( j = n1*esz; j < cn*esz; j++ )
                mean[j] = mean[j - n1*esz];
639

640
        if( _param2.isContinuous() && _param2.type() == ptype )
641
            stddev = _param2.ptr();
642 643 644 645 646 647
        else
        {
            Mat tmp(_param2.size(), ptype, parambuf + cn);
            _param2.convertTo(tmp, ptype);
            stddev = (uchar*)(parambuf + cn);
        }
648

649 650 651
        if( n1 < cn )
            for( j = n1*esz; j < cn*esz; j++ )
                stddev[j] = stddev[j - n1*esz];
652

653 654 655
        stdmtx = _param2.rows == cn && _param2.cols == cn;
        scaleFunc = randnScaleTab[depth];
        CV_Assert( scaleFunc != 0 );
656 657 658 659
    }
    else
        CV_Error( CV_StsBadArg, "Unknown distribution type" );

660 661 662 663 664 665 666 667
    const Mat* arrays[] = {&mat, 0};
    uchar* ptr;
    NAryMatIterator it(arrays, &ptr);
    int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total);
    size_t esz = mat.elemSize();
    AutoBuffer<double> buf;
    uchar* param = 0;
    float* nbuf = 0;
668

669
    if( disttype == UNIFORM )
670
    {
671 672
        buf.allocate(blockSize*cn*4);
        param = (uchar*)(double*)buf;
673

674
        if( ip )
675
        {
676 677 678 679 680 681 682 683
            if( ds )
            {
                DivStruct* p = (DivStruct*)param;
                for( j = 0; j < blockSize*cn; j += cn )
                    for( k = 0; k < cn; k++ )
                        p[j + k] = ds[k];
            }
            else
684
            {
685 686 687 688
                Vec2i* p = (Vec2i*)param;
                for( j = 0; j < blockSize*cn; j += cn )
                    for( k = 0; k < cn; k++ )
                        p[j + k] = ip[k];
689
            }
690 691 692 693 694 695 696 697 698 699 700 701 702 703
        }
        else if( fp )
        {
            Vec2f* p = (Vec2f*)param;
            for( j = 0; j < blockSize*cn; j += cn )
                for( k = 0; k < cn; k++ )
                    p[j + k] = fp[k];
        }
        else
        {
            Vec2d* p = (Vec2d*)param;
            for( j = 0; j < blockSize*cn; j += cn )
                for( k = 0; k < cn; k++ )
                    p[j + k] = dp[k];
704 705
        }
    }
706 707 708 709 710
    else
    {
        buf.allocate((blockSize*cn+1)/2);
        nbuf = (float*)(double*)buf;
    }
711

712
    for( size_t i = 0; i < it.nplanes; i++, ++it )
713
    {
714 715 716
        for( j = 0; j < total; j += blockSize )
        {
            int len = std::min(total - j, blockSize);
717

718 719 720 721 722 723 724 725 726
            if( disttype == CV_RAND_UNI )
                func( ptr, len*cn, &state, param, smallFlag != 0 );
            else
            {
                randn_0_1_32f(nbuf, len*cn, &state);
                scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx);
            }
            ptr += len*esz;
        }
727
    }
728 729
}

730 731
}

732
cv::RNG& cv::theRNG()
733
{
734
    return coreTlsData.get()->rng;
735
}
736

737
void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
738 739 740 741
{
    theRNG().fill(dst, RNG::UNIFORM, low, high);
}

742
void cv::randn(InputOutputArray dst, InputArray mean, InputArray stddev)
743 744
{
    theRNG().fill(dst, RNG::NORMAL, mean, stddev);
745 746
}

747 748 749
namespace cv
{

750 751 752 753 754 755
template<typename T> static void
randShuffle_( Mat& _arr, RNG& rng, double iterFactor )
{
    int sz = _arr.rows*_arr.cols, iters = cvRound(iterFactor*sz);
    if( _arr.isContinuous() )
    {
756
        T* arr = _arr.ptr<T>();
757 758 759 760 761 762 763 764
        for( int i = 0; i < iters; i++ )
        {
            int j = (unsigned)rng % sz, k = (unsigned)rng % sz;
            std::swap( arr[j], arr[k] );
        }
    }
    else
    {
765
        uchar* data = _arr.ptr();
766 767 768 769 770 771 772 773 774 775 776 777 778 779
        size_t step = _arr.step;
        int cols = _arr.cols;
        for( int i = 0; i < iters; i++ )
        {
            int j1 = (unsigned)rng % sz, k1 = (unsigned)rng % sz;
            int j0 = j1/cols, k0 = k1/cols;
            j1 -= j0*cols; k1 -= k0*cols;
            std::swap( ((T*)(data + step*j0))[j1], ((T*)(data + step*k0))[k1] );
        }
    }
}

typedef void (*RandShuffleFunc)( Mat& dst, RNG& rng, double iterFactor );

780
}
781

782
void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
783 784 785 786 787 788 789 790 791 792 793
{
    RandShuffleFunc tab[] =
    {
        0,
        randShuffle_<uchar>, // 1
        randShuffle_<ushort>, // 2
        randShuffle_<Vec<uchar,3> >, // 3
        randShuffle_<int>, // 4
        0,
        randShuffle_<Vec<ushort,3> >, // 6
        0,
794
        randShuffle_<Vec<int,2> >, // 8
795 796 797
        0, 0, 0,
        randShuffle_<Vec<int,3> >, // 12
        0, 0, 0,
798
        randShuffle_<Vec<int,4> >, // 16
799
        0, 0, 0, 0, 0, 0, 0,
800
        randShuffle_<Vec<int,6> >, // 24
801
        0, 0, 0, 0, 0, 0, 0,
802
        randShuffle_<Vec<int,8> > // 32
803
    };
804

805
    Mat dst = _dst.getMat();
806 807 808 809 810 811 812 813 814 815 816 817 818 819
    RNG& rng = _rng ? *_rng : theRNG();
    CV_Assert( dst.elemSize() <= 32 );
    RandShuffleFunc func = tab[dst.elemSize()];
    CV_Assert( func != 0 );
    func( dst, rng, iterFactor );
}

CV_IMPL void
cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
{
    cv::Mat mat = cv::cvarrToMat(arr);
    // !!! this will only work for current 64-bit MWC RNG !!!
    cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
    rng.fill(mat, disttype == CV_RAND_NORMAL ?
820
        cv::RNG::NORMAL : cv::RNG::UNIFORM, cv::Scalar(param1), cv::Scalar(param2) );
821 822 823 824 825 826 827 828 829
}

CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
{
    cv::Mat dst = cv::cvarrToMat(arr);
    cv::RNG& rng = _rng ? (cv::RNG&)*_rng : cv::theRNG();
    cv::randShuffle( dst, iter_factor, &rng );
}

830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954
// Mersenne Twister random number generator.
// Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c

/*
   A C-program for MT19937, with initialization improved 2002/1/26.
   Coded by Takuji Nishimura and Makoto Matsumoto.

   Before using, initialize the state by using init_genrand(seed)
   or init_by_array(init_key, key_length).

   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
   All rights reserved.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:

     1. Redistributions of source code must retain the above copyright
        notice, this list of conditions and the following disclaimer.

     2. Redistributions in binary form must reproduce the above copyright
        notice, this list of conditions and the following disclaimer in the
        documentation and/or other materials provided with the distribution.

     3. The names of its contributors may not be used to endorse or promote
        products derived from this software without specific prior written
        permission.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


   Any feedback is very welcome.
   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
*/

cv::RNG_MT19937::RNG_MT19937(unsigned s) { seed(s); }

cv::RNG_MT19937::RNG_MT19937() { seed(5489U); }

void cv::RNG_MT19937::seed(unsigned s)
{
    state[0]= s;
    for (mti = 1; mti < N; mti++)
    {
        /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
        state[mti] = (1812433253U * (state[mti - 1] ^ (state[mti - 1] >> 30)) + mti);
    }
}

unsigned cv::RNG_MT19937::next()
{
    /* mag01[x] = x * MATRIX_A  for x=0,1 */
    static unsigned mag01[2] = { 0x0U, /*MATRIX_A*/ 0x9908b0dfU};

    const unsigned UPPER_MASK = 0x80000000U;
    const unsigned LOWER_MASK = 0x7fffffffU;

    /* generate N words at one time */
    if (mti >= N)
    {
        int kk = 0;

        for (; kk < N - M; ++kk)
        {
            unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
            state[kk] = state[kk + M] ^ (y >> 1) ^ mag01[y & 0x1U];
        }

        for (; kk < N - 1; ++kk)
        {
            unsigned y = (state[kk] & UPPER_MASK) | (state[kk + 1] & LOWER_MASK);
            state[kk] = state[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1U];
        }

        unsigned y = (state[N - 1] & UPPER_MASK) | (state[0] & LOWER_MASK);
        state[N - 1] = state[M - 1] ^ (y >> 1) ^ mag01[y & 0x1U];

        mti = 0;
    }

    unsigned y = state[mti++];

    /* Tempering */
    y ^= (y >> 11);
    y ^= (y <<  7) & 0x9d2c5680U;
    y ^= (y << 15) & 0xefc60000U;
    y ^= (y >> 18);

    return y;
}

cv::RNG_MT19937::operator unsigned() { return next(); }

cv::RNG_MT19937::operator int() { return (int)next();}

cv::RNG_MT19937::operator float() { return next() * (1.f / 4294967296.f); }

cv::RNG_MT19937::operator double()
{
    unsigned a = next() >> 5;
    unsigned b = next() >> 6;
    return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
}

int cv::RNG_MT19937::uniform(int a, int b) { return (int)(next() % (b - a) + a); }

float cv::RNG_MT19937::uniform(float a, float b) { return ((float)*this)*(b - a) + a; }

double cv::RNG_MT19937::uniform(double a, double b) { return ((double)*this)*(b - a) + a; }

unsigned cv::RNG_MT19937::operator ()(unsigned b) { return next() % b; }

unsigned cv::RNG_MT19937::operator ()() { return next(); }

955
/* End of file. */