optimized lapack' SVD for noticeably better performance on small matrices

e48a456d · Vadim Pisarevsky · fea66d93 · e48a456d · e48a456d · e48a456d
Commit e48a456d authored Aug 30, 2010 by Vadim Pisarevsky
18 changed files
--- a/3rdparty/include/cblas.h
+++ b/3rdparty/include/cblas.h
@@ -37,11 +37,28 @@ static __inline double r_sign(real *a, real *b)
    return *b >= 0 ? x : -x;
 }

+extern const unsigned char lapack_toupper_tab[];
+#define lapack_toupper(c) ((char)lapack_toupper_tab[(unsigned char)(c)])
+
+extern const unsigned char lapack_lamch_tab[];
+extern const doublereal lapack_dlamch_tab[];
+extern const doublereal lapack_slamch_tab[];
+    
 static __inline logical lsame_(char *ca, char *cb)
 {
-    return toupper(ca[0]) == toupper(cb[0]);
+    return lapack_toupper(ca[0]) == lapack_toupper(cb[0]);
 }

+static __inline doublereal dlamch_(char* cmach)
+{
+    return lapack_dlamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
+}
+    
+static __inline doublereal slamch_(char* cmach)
+{
+    return lapack_slamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
+}    
+    
 static __inline integer i_nint(real *x)
 {
    return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x));

--- a/3rdparty/include/clapack.h
+++ b/3rdparty/include/clapack.h
@@ -3680,8 +3680,6 @@ doublereal dsecnd_();

 doublereal second_();

-doublereal slamch_(char *cmach);
-
 /* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical 
 	*ieee1);

@@ -3696,8 +3694,6 @@ doublereal slamc3_(real *a, real *b);
 		    logical *ieee, integer *emax, real *rmax);


-doublereal dlamch_(char *cmach);
-
 /* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical
 		    *ieee1);

@@ -3712,9 +3708,6 @@ doublereal dlamc3_(doublereal *a, doublereal *b);
 /* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin,
 		    logical *ieee, integer *emax, doublereal *rmax);

-integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, 
-	integer *n2, integer *n3, integer *n4);
-
 #ifdef __cplusplus
 }
 #endif

--- a/3rdparty/include/f2c.h
+++ b/3rdparty/include/f2c.h
@@ -7,6 +7,7 @@
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE

+#include <assert.h>
 #include <math.h>
 #include <ctype.h>
 #include <stdlib.h>
@@ -17,6 +18,10 @@
 #include <string.h>
 #include <stdio.h>

+#if __SSE2__ || defined _M_X64
+#include "emmintrin.h"
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif

--- a/3rdparty/lapack/dgemv.c
+++ b/3rdparty/lapack/dgemv.c
-/* dgemv.f -- translated by f2c (version 20061008).
-   You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-		http://www.netlib.org/f2c/libf2c.zip
-*/
-
 #include "clapack.h"


-/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
-	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
-	doublereal *beta, doublereal *y, integer *incy)
+/* Subroutine */ int dgemv_(char *_trans, integer *_m, integer *_n, doublereal *
+	_alpha, doublereal *a, integer *_lda, doublereal *x, integer *_incx, 
+	doublereal *_beta, doublereal *y, integer *_incy)
 {
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    doublereal temp;
-    integer lenx, leny;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
 /*     .. Scalar Arguments .. */
 /*     .. */
 /*     .. Array Arguments .. */
@@ -136,175 +114,123 @@

 /*     Test the input parameters. */

-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
-	    ) {
-	info = 1;
-    } else if (*m < 0) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*lda < max(1,*m)) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("DGEMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
-	return 0;
-    }
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N")) {
-	lenx = *n;
-	leny = *m;
-    } else {
-	lenx = *m;
-	leny = *n;
-    }
-    if (*incx > 0) {
-	kx = 1;
-    } else {
-	kx = 1 - (lenx - 1) * *incx;
+    char trans = lapack_toupper(_trans[0]);
+    integer i, j, m = *_m, n = *_n, lda = *_lda, incx = *_incx, incy = *_incy;
+    integer leny = trans == 'N' ? m : n, lenx = trans == 'N' ? n : m;
+    real alpha = *_alpha, beta = *_beta;
+    
+    integer info = 0;
+    if (trans != 'N' && trans != 'T' && trans != 'C')
+        info = 1;
+    else if (m < 0)
+        info = 2;
+    else if (n < 0)
+        info = 3;
+    else if (lda < max(1,m))
+        info = 6;
+    else if (incx == 0)
+        info = 8;
+    else if (incy == 0)
+        info = 11;
+    
+    if (info != 0)
+    {
+        xerbla_("SGEMV ", &info);
+        return 0;
    }
-    if (*incy > 0) {
-	ky = 1;
-    } else {
-	ky = 1 - (leny - 1) * *incy;
+    
+    if( incy < 0 )
+        y -= incy*(leny - 1);
+    if( incx < 0 )
+        x -= incx*(lenx - 1);
+    
+    /*     Start the operations. In this version the elements of A are */
+    /*     accessed sequentially with one pass through A. */
+    
+    if( beta != 1. )
+    {
+        if( incy == 1 )
+        {
+            if( beta == 0. )
+                for( i = 0; i < leny; i++ )
+                    y[i] = 0.;
+            else
+                for( i = 0; i < leny; i++ )
+                    y[i] *= beta;
+        }
+        else
+        {
+            if( beta == 0. )
+                for( i = 0; i < leny; i++ )
+                    y[i*incy] = 0.;
+            else
+                for( i = 0; i < leny; i++ )
+                    y[i*incy] *= beta;
+        }
    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-	if (*incy == 1) {
-	    if (*beta == 0.) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.;
-/* L10: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
+    
+    if( alpha == 0. )
+        ;
+    else if( trans == 'N' )
+    {
+        if( incy == 1 )
+        {
+            for( i = 0; i < n; i++, a += lda )
+            {
+                doublereal s = x[i*incx];
+                if( s == 0. )
+                    continue;
+                s *= alpha;
+                for( j = 0; j <= m - 2; j += 2 )
+                {
+                    doublereal t0 = y[j] + s*a[j];
+                    doublereal t1 = y[j+1] + s*a[j+1];
+                    y[j] = t0; y[j+1] = t1;
+                }
+                
+                for( ; j < m; j++ )
+                    y[j] += s*a[j];
+            }
+        }
+        else
+        {
+            for( i = 0; i < n; i++, a += lda )
+            {
+                doublereal s = x[i*incx];
+                if( s == 0. )
+                    continue;
+                s *= alpha;
+                for( j = 0; j < m; j++ )
+                    y[j*incy] += s*a[j];
+            }
+        }
    }
-    if (*alpha == 0.) {
-	return 0;
+    else
+    {
+        if( incx == 1 )
+        {
+            for( i = 0; i < n; i++, a += lda )
+            {
+                doublereal s = 0;
+                for( j = 0; j <= m - 2; j += 2 )
+                    s += x[j]*a[j] + x[j+1]*a[j+1];
+                for( ; j < m; j++ )
+                    s += x[j]*a[j];
+                y[i*incy] += alpha*s;
+            }
+        }
+        else
+        {
+            for( i = 0; i < n; i++, a += lda )
+            {
+                doublereal s = 0;
+                for( j = 0; j < m; j++ )
+                    s += x[j*incx]*a[j];
+                y[i*incy] += alpha*s;
+            }
+        }
    }
-    if (lsame_(trans, "N")) {
-
-/*        Form  y := alpha*A*x + y. */
-
-	jx = kx;
-	if (*incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.) {
-		    temp = *alpha * x[jx];
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-		    }
-		}
-		jx += *incx;
-/* L60: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.) {
-		    temp = *alpha * x[jx];
-		    iy = ky;
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[iy] += temp * a[i__ + j * a_dim1];
-			iy += *incy;
-/* L70: */
-		    }
-		}
-		jx += *incx;
-/* L80: */
-	    }
-	}
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-	jy = ky;
-	if (*incx == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L100: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.;
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-/* L110: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L120: */
-	    }
-	}
-    }
-
+    
    return 0;

 /*     End of DGEMV . */

--- a/3rdparty/lapack/dger.c
+++ b/3rdparty/lapack/dger.c
-/* dger.f -- translated by f2c (version 20061008).
-   You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-		http://www.netlib.org/f2c/libf2c.zip
-*/
-
 #include "clapack.h"


-/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, 
-	doublereal *x, integer *incx, doublereal *y, integer *incy, 
-	doublereal *a, integer *lda)
+/* Subroutine */ int dger_(integer *_m, integer *_n, doublereal *_alpha, 
+	doublereal *x, integer *_incx, doublereal *y, integer *_incy, 
+	doublereal *a, integer *_lda)
 {
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    integer i__, j, ix, jy, kx, info;
-    doublereal temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *);

 /*     .. Scalar Arguments .. */
 /*     .. */
@@ -111,80 +92,70 @@

 /*     Test the input parameters. */

-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 5;
-    } else if (*incy == 0) {
-	info = 7;
-    } else if (*lda < max(1,*m)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("DGER  ", &info);
-	return 0;
+    integer i, j, m = *_m, n = *_n, incx = *_incx, incy = *_incy, lda = *_lda;
+    doublereal alpha = *_alpha;
+    integer info = 0;
+    
+    if (m < 0)
+        info = 1;
+    else if (n < 0)
+        info = 2;
+    else if (incx == 0)
+        info = 5;
+    else if (incy == 0)
+        info = 7;
+    else if (lda < max(1,m))
+        info = 9;
+    
+    if (info != 0)
+    {
+        xerbla_("DGER  ", &info);
+        return 0;
    }

-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.) {
-	return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-	jy = 1;
-    } else {
-	jy = 1 - (*n - 1) * *incy;
+    if (incx < 0)
+        x -= (m-1)*incx;
+    if (incy < 0)
+        y -= (n-1)*incy;
+
+    /*     Start the operations. In this version the elements of A are */
+    /*     accessed sequentially with one pass through A. */
+    
+    if( alpha == 0 )
+        ;
+    else if( incx == 1 )
+    {
+        for( j = 0; j < n; j++, a += lda )
+        {
+            doublereal s = y[j*incy];
+            if( s == 0 )
+                continue;
+            s *= alpha;
+            
+            for( i = 0; i <= m - 2; i += 2 )
+            {
+                doublereal t0 = a[i] + x[i]*s;
+                doublereal t1 = a[i+1] + x[i+1]*s;
+                a[i] = t0; a[i+1] = t1;
+            }
+            
+            for( ; i < m; i++ )
+                a[i] += x[i]*s;
+        }
    }
-    if (*incx == 1) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.) {
-		temp = *alpha * y[jy];
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-		}
-	    }
-	    jy += *incy;
-/* L20: */
-	}
-    } else {
-	if (*incx > 0) {
-	    kx = 1;
-	} else {
-	    kx = 1 - (*m - 1) * *incx;
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.) {
-		temp = *alpha * y[jy];
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[ix] * temp;
-		    ix += *incx;
-/* L30: */
-		}
-	    }
-	    jy += *incy;
-/* L40: */
-	}
+    else
+    {
+        for( j = 0; j < n; j++, a += lda )
+        {
+            doublereal s = y[j*incy];
+            if( s == 0 )
+                continue;
+            s *= alpha;
+            
+            for( i = 0; i < m; i++ )
+                a[i] += x[i*incx]*s;
+        }
    }

    return 0;

--- a/3rdparty/lapack/dlamch.c
+++ b/3rdparty/lapack/dlamch.c
-#include "clapack.h"
-#include <float.h>
-#include <stdio.h>
-
-/* *********************************************************************** */
-
-doublereal dlamc3_(doublereal *a, doublereal *b)
-{
-    /* System generated locals */
-    doublereal ret_val;
-    
-    
-    /*  -- LAPACK auxiliary routine (version 3.1) -- */
-    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-    /*     November 2006 */
-    
-    /*     .. Scalar Arguments .. */
-    /*     .. */
-    
-    /*  Purpose */
-    /*  ======= */
-    
-    /*  DLAMC3  is intended to force  A  and  B  to be stored prior to doing */
-    /*  the addition of  A  and  B ,  for use in situations where optimizers */
-    /*  might hold one of these in a register. */
-    
-    /*  Arguments */
-    /*  ========= */
-    
-    /*  A       (input) DOUBLE PRECISION */
-    /*  B       (input) DOUBLE PRECISION */
-    /*          The values A and B. */
-    
-    /* ===================================================================== */
-    
-    /*     .. Executable Statements .. */
-    
-    ret_val = *a + *b;
-    
-    return ret_val;
-    
-    /*     End of DLAMC3 */
-    
-} /* dlamc3_ */
-
-
-#if 1
-
-/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
-   taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
-
-#ifndef DBL_DIGITS
-#define DBL_DIGITS 53
-#endif
-
-doublereal
-dlamch_(char *cmach) {
-    char ch = cmach[0];
-    double eps = DBL_EPSILON, sfmin, small;
-    
-    if ('B' == ch || 'b' == ch) {
-        return FLT_RADIX;
-    } else if ('E' == ch || 'e' == ch) {
-        return eps;
-    } else if ('L' == ch || 'l' == ch) {
-        return DBL_MAX_EXP;
-    } else if ('M' == ch || 'm' == ch) {
-        return DBL_MIN_EXP;
-    } else if ('N' == ch || 'n' == ch) {
-        return DBL_DIGITS;
-    } else if ('O' == ch || 'o' == ch) {
-        return DBL_MAX;
-    } else if ('P' == ch || 'p' == ch) {
-        return eps * FLT_RADIX;
-    } else if ('R' == ch || 'r' == ch) {
-        return FLT_ROUNDS < 2;
-    } else if ('S' == ch || 's' == ch) {
-        /* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow
-         when computing  1/sfmin. */
-        sfmin = DBL_MIN;
-        small = 2. / DBL_MAX;
-        if (small <= sfmin) small = sfmin * (1 + eps);
-        return small;
-    } else if ('U' == ch || 'u' == ch) {
-        return DBL_MIN;
-    }
-    
-    return 0;
-}
-
-#else
-
-/* Table of constant values */
-
-static integer c__1 = 1;
-static doublereal c_b32 = 0.;
-
-doublereal dlamch_(char *cmach)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
-
-    /* Builtin functions */
-    double pow_di(doublereal *, integer *);
-
-    /* Local variables */
-    static doublereal t;
-    integer it;
-    static doublereal rnd, eps, base;
-    integer beta;
-    static doublereal emin, prec, emax;
-    integer imin, imax;
-    logical lrnd;
-    static doublereal rmin, rmax;
-    doublereal rmach;
-    extern logical lsame_(char *, char *);
-    doublereal small;
-    static doublereal sfmin;
-    extern /* Subroutine */ int dlamc2_(integer *, integer *, logical *, 
-	    doublereal *, integer *, doublereal *, integer *, doublereal *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DLAMCH determines double precision machine parameters. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  CMACH   (input) CHARACTER*1 */
-/*          Specifies the value to be returned by DLAMCH: */
-/*          = 'E' or 'e',   DLAMCH := eps */
-/*          = 'S' or 's ,   DLAMCH := sfmin */
-/*          = 'B' or 'b',   DLAMCH := base */
-/*          = 'P' or 'p',   DLAMCH := eps*base */
-/*          = 'N' or 'n',   DLAMCH := t */
-/*          = 'R' or 'r',   DLAMCH := rnd */
-/*          = 'M' or 'm',   DLAMCH := emin */
-/*          = 'U' or 'u',   DLAMCH := rmin */
-/*          = 'L' or 'l',   DLAMCH := emax */
-/*          = 'O' or 'o',   DLAMCH := rmax */
-
-/*          where */
-
-/*          eps   = relative machine precision */
-/*          sfmin = safe minimum, such that 1/sfmin does not overflow */
-/*          base  = base of the machine */
-/*          prec  = eps*base */
-/*          t     = number of (base) digits in the mantissa */
-/*          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise */
-/*          emin  = minimum exponent before (gradual) underflow */
-/*          rmin  = underflow threshold - base**(emin-1) */
-/*          emax  = largest exponent before overflow */
-/*          rmax  = overflow threshold  - (base**emax)*(1-eps) */
-
-/* ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Save statement .. */
-/*     .. */
-/*     .. Data statements .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    if (first) {
-	dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
-	base = (doublereal) beta;
-	t = (doublereal) it;
-	if (lrnd) {
-	    rnd = 1.;
-	    i__1 = 1 - it;
-	    eps = pow_di(&base, &i__1) / 2;
-	} else {
-	    rnd = 0.;
-	    i__1 = 1 - it;
-	    eps = pow_di(&base, &i__1);
-	}
-	prec = eps * base;
-	emin = (doublereal) imin;
-	emax = (doublereal) imax;
-	sfmin = rmin;
-	small = 1. / rmax;
-	if (small >= sfmin) {
-
-/*           Use SMALL plus a bit, to avoid the possibility of rounding */
-/*           causing overflow when computing  1/sfmin. */
-
-	    sfmin = small * (eps + 1.);
-	}
-    }
-
-    if (lsame_(cmach, "E")) {
-	rmach = eps;
-    } else if (lsame_(cmach, "S")) {
-	rmach = sfmin;
-    } else if (lsame_(cmach, "B")) {
-	rmach = base;
-    } else if (lsame_(cmach, "P")) {
-	rmach = prec;
-    } else if (lsame_(cmach, "N")) {
-	rmach = t;
-    } else if (lsame_(cmach, "R")) {
-	rmach = rnd;
-    } else if (lsame_(cmach, "M")) {
-	rmach = emin;
-    } else if (lsame_(cmach, "U")) {
-	rmach = rmin;
-    } else if (lsame_(cmach, "L")) {
-	rmach = emax;
-    } else if (lsame_(cmach, "O")) {
-	rmach = rmax;
-    }
-
-    ret_val = rmach;
-    first = FALSE_;
-    return ret_val;
-
-/*     End of DLAMCH */
-
-} /* dlamch_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical 
-	*ieee1)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
-    /* System generated locals */
-    doublereal d__1, d__2;
-
-    /* Local variables */
-    doublereal a, b, c__, f, t1, t2;
-    static integer lt;
-    doublereal one, qtr;
-    static logical lrnd;
-    static integer lbeta;
-    doublereal savec;
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    static logical lieee1;
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DLAMC1 determines the machine parameters given by BETA, T, RND, and */
-/*  IEEE1. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  BETA    (output) INTEGER */
-/*          The base of the machine. */
-
-/*  T       (output) INTEGER */
-/*          The number of ( BETA ) digits in the mantissa. */
-
-/*  RND     (output) LOGICAL */
-/*          Specifies whether proper rounding  ( RND = .TRUE. )  or */
-/*          chopping  ( RND = .FALSE. )  occurs in addition. This may not */
-/*          be a reliable guide to the way in which the machine performs */
-/*          its arithmetic. */
-
-/*  IEEE1   (output) LOGICAL */
-/*          Specifies whether rounding appears to be done in the IEEE */
-/*          'round to nearest' style. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  The routine is based on the routine  ENVRON  by Malcolm and */
-/*  incorporates suggestions by Gentleman and Marovich. See */
-
-/*     Malcolm M. A. (1972) Algorithms to reveal properties of */
-/*        floating-point arithmetic. Comms. of the ACM, 15, 949-951. */
-
-/*     Gentleman W. M. and Marovich S. B. (1974) More on algorithms */
-/*        that reveal properties of floating point arithmetic units. */
-/*        Comms. of the ACM, 17, 276-277. */
-
-/* ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Save statement .. */
-/*     .. */
-/*     .. Data statements .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    if (first) {
-	one = 1.;
-
-/*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA, */
-/*        IEEE1, T and RND. */
-
-/*        Throughout this routine  we use the function  DLAMC3  to ensure */
-/*        that relevant values are  stored and not held in registers,  or */
-/*        are not affected by optimizers. */
-
-/*        Compute  a = 2.0**m  with the  smallest positive integer m such */
-/*        that */
-
-/*           fl( a + 1.0 ) = a. */
-
-	a = 1.;
-	c__ = 1.;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L10:
-	if (c__ == one) {
-	    a *= 2;
-	    c__ = dlamc3_(&a, &one);
-	    d__1 = -a;
-	    c__ = dlamc3_(&c__, &d__1);
-	    goto L10;
-	}
-/* +       END WHILE */
-
-/*        Now compute  b = 2.0**m  with the smallest positive integer m */
-/*        such that */
-
-/*           fl( a + b ) .gt. a. */
-
-	b = 1.;
-	c__ = dlamc3_(&a, &b);
-
-/* +       WHILE( C.EQ.A )LOOP */
-L20:
-	if (c__ == a) {
-	    b *= 2;
-	    c__ = dlamc3_(&a, &b);
-	    goto L20;
-	}
-/* +       END WHILE */
-
-/*        Now compute the base.  a and c  are neighbouring floating point */
-/*        numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so */
-/*        their difference is beta. Adding 0.25 to c is to ensure that it */
-/*        is truncated to beta and not ( beta - 1 ). */
-
-	qtr = one / 4;
-	savec = c__;
-	d__1 = -a;
-	c__ = dlamc3_(&c__, &d__1);
-	lbeta = (integer) (c__ + qtr);
-
-/*        Now determine whether rounding or chopping occurs,  by adding a */
-/*        bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a. */
-
-	b = (doublereal) lbeta;
-	d__1 = b / 2;
-	d__2 = -b / 100;
-	f = dlamc3_(&d__1, &d__2);
-	c__ = dlamc3_(&f, &a);
-	if (c__ == a) {
-	    lrnd = TRUE_;
-	} else {
-	    lrnd = FALSE_;
-	}
-	d__1 = b / 2;
-	d__2 = b / 100;
-	f = dlamc3_(&d__1, &d__2);
-	c__ = dlamc3_(&f, &a);
-	if (lrnd && c__ == a) {
-	    lrnd = FALSE_;
-	}
-
-/*        Try and decide whether rounding is done in the  IEEE  'round to */
-/*        nearest' style. B/2 is half a unit in the last place of the two */
-/*        numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit */
-/*        zero, and SAVEC is odd. Thus adding B/2 to A should not  change */
-/*        A, but adding B/2 to SAVEC should change SAVEC. */
-
-	d__1 = b / 2;
-	t1 = dlamc3_(&d__1, &a);
-	d__1 = b / 2;
-	t2 = dlamc3_(&d__1, &savec);
-	lieee1 = t1 == a && t2 > savec && lrnd;
-
-/*        Now find  the  mantissa, t.  It should  be the  integer part of */
-/*        log to the base beta of a,  however it is safer to determine  t */
-/*        by powering.  So we find t as the smallest positive integer for */
-/*        which */
-
-/*           fl( beta**t + 1.0 ) = 1.0. */
-
-	lt = 0;
-	a = 1.;
-	c__ = 1.;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L30:
-	if (c__ == one) {
-	    ++lt;
-	    a *= lbeta;
-	    c__ = dlamc3_(&a, &one);
-	    d__1 = -a;
-	    c__ = dlamc3_(&c__, &d__1);
-	    goto L30;
-	}
-/* +       END WHILE */
-
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *ieee1 = lieee1;
-    first = FALSE_;
-    return 0;
-
-/*     End of DLAMC1 */
-
-} /* dlamc1_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int dlamc2_(integer *beta, integer *t, logical *rnd, 
-	doublereal *eps, integer *emin, doublereal *rmin, integer *emax, 
-	doublereal *rmax)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-    static logical iwarn = FALSE_;
-
-    /* Format strings */
-    static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre"
-	    "ct:-\002,\002  EMIN = \002,i8,/\002 If, after inspection, the va"
-	    "lue EMIN looks\002,\002 acceptable please comment out \002,/\002"
-	    " the IF block as marked within the code of routine\002,\002 DLAM"
-	    "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)";
-
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1, d__2, d__3, d__4, d__5;
-
-    /* Builtin functions */
-    double pow_di(doublereal *, integer *);
-    //integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
-
-    /* Local variables */
-    doublereal a, b, c__;
-    integer i__;
-    static integer lt;
-    doublereal one, two;
-    logical ieee;
-    doublereal half;
-    logical lrnd;
-    static doublereal leps;
-    doublereal zero;
-    static integer lbeta;
-    doublereal rbase;
-    static integer lemin, lemax;
-    integer gnmin;
-    doublereal small;
-    integer gpmin;
-    doublereal third;
-    static doublereal lrmin, lrmax;
-    doublereal sixth;
-    extern /* Subroutine */ int dlamc1_(integer *, integer *, logical *, 
-	    logical *);
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    logical lieee1;
-    extern /* Subroutine */ int dlamc4_(integer *, doublereal *, integer *), 
-	    dlamc5_(integer *, integer *, integer *, logical *, integer *, 
-	    doublereal *);
-    integer ngnmin, ngpmin;
-
-    /* Fortran I/O blocks */
-    static cilist io___58 = { 0, 6, 0, fmt_9999, 0 };
-
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DLAMC2 determines the machine parameters specified in its argument */
-/*  list. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  BETA    (output) INTEGER */
-/*          The base of the machine. */
-
-/*  T       (output) INTEGER */
-/*          The number of ( BETA ) digits in the mantissa. */
-
-/*  RND     (output) LOGICAL */
-/*          Specifies whether proper rounding  ( RND = .TRUE. )  or */
-/*          chopping  ( RND = .FALSE. )  occurs in addition. This may not */
-/*          be a reliable guide to the way in which the machine performs */
-/*          its arithmetic. */
-
-/*  EPS     (output) DOUBLE PRECISION */
-/*          The smallest positive number such that */
-
-/*             fl( 1.0 - EPS ) .LT. 1.0, */
-
-/*          where fl denotes the computed value. */
-
-/*  EMIN    (output) INTEGER */
-/*          The minimum exponent before (gradual) underflow occurs. */
-
-/*  RMIN    (output) DOUBLE PRECISION */
-/*          The smallest normalized number for the machine, given by */
-/*          BASE**( EMIN - 1 ), where  BASE  is the floating point value */
-/*          of BETA. */
-
-/*  EMAX    (output) INTEGER */
-/*          The maximum exponent before overflow occurs. */
-
-/*  RMAX    (output) DOUBLE PRECISION */
-/*          The largest positive number for the machine, given by */
-/*          BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point */
-/*          value of BETA. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  The computation of  EPS  is based on a routine PARANOIA by */
-/*  W. Kahan of the University of California at Berkeley. */
-
-/* ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Save statement .. */
-/*     .. */
-/*     .. Data statements .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    if (first) {
-	zero = 0.;
-	one = 1.;
-	two = 2.;
-
-/*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of */
-/*        BETA, T, RND, EPS, EMIN and RMIN. */
-
-/*        Throughout this routine  we use the function  DLAMC3  to ensure */
-/*        that relevant values are stored  and not held in registers,  or */
-/*        are not affected by optimizers. */
-
-/*        DLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1. */
-
-	dlamc1_(&lbeta, &lt, &lrnd, &lieee1);
-
-/*        Start to find EPS. */
-
-	b = (doublereal) lbeta;
-	i__1 = -lt;
-	a = pow_di(&b, &i__1);
-	leps = a;
-
-/*        Try some tricks to see whether or not this is the correct  EPS. */
-
-	b = two / 3;
-	half = one / 2;
-	d__1 = -half;
-	sixth = dlamc3_(&b, &d__1);
-	third = dlamc3_(&sixth, &sixth);
-	d__1 = -half;
-	b = dlamc3_(&third, &d__1);
-	b = dlamc3_(&b, &sixth);
-	b = abs(b);
-	if (b < leps) {
-	    b = leps;
-	}
-
-	leps = 1.;
-
-/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
-L10:
-	if (leps > b && b > zero) {
-	    leps = b;
-	    d__1 = half * leps;
-/* Computing 5th power */
-	    d__3 = two, d__4 = d__3, d__3 *= d__3;
-/* Computing 2nd power */
-	    d__5 = leps;
-	    d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5);
-	    c__ = dlamc3_(&d__1, &d__2);
-	    d__1 = -c__;
-	    c__ = dlamc3_(&half, &d__1);
-	    b = dlamc3_(&half, &c__);
-	    d__1 = -b;
-	    c__ = dlamc3_(&half, &d__1);
-	    b = dlamc3_(&half, &c__);
-	    goto L10;
-	}
-/* +       END WHILE */
-
-	if (a < leps) {
-	    leps = a;
-	}
-
-/*        Computation of EPS complete. */
-
-/*        Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)). */
-/*        Keep dividing  A by BETA until (gradual) underflow occurs. This */
-/*        is detected when we cannot recover the previous A. */
-
-	rbase = one / lbeta;
-	small = one;
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    d__1 = small * rbase;
-	    small = dlamc3_(&d__1, &zero);
-/* L20: */
-	}
-	a = dlamc3_(&one, &small);
-	dlamc4_(&ngpmin, &one, &lbeta);
-	d__1 = -one;
-	dlamc4_(&ngnmin, &d__1, &lbeta);
-	dlamc4_(&gpmin, &a, &lbeta);
-	d__1 = -a;
-	dlamc4_(&gnmin, &d__1, &lbeta);
-	ieee = FALSE_;
-
-	if (ngpmin == ngnmin && gpmin == gnmin) {
-	    if (ngpmin == gpmin) {
-		lemin = ngpmin;
-/*            ( Non twos-complement machines, no gradual underflow; */
-/*              e.g.,  VAX ) */
-	    } else if (gpmin - ngpmin == 3) {
-		lemin = ngpmin - 1 + lt;
-		ieee = TRUE_;
-/*            ( Non twos-complement machines, with gradual underflow; */
-/*              e.g., IEEE standard followers ) */
-	    } else {
-		lemin = min(ngpmin,gpmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
-
-	} else if (ngpmin == gpmin && ngnmin == gnmin) {
-	    if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
-		lemin = max(ngpmin,ngnmin);
-/*            ( Twos-complement machines, no gradual underflow; */
-/*              e.g., CYBER 205 ) */
-	    } else {
-		lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
-
-	} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
-		 {
-	    if (gpmin - min(ngpmin,ngnmin) == 3) {
-		lemin = max(ngpmin,ngnmin) - 1 + lt;
-/*            ( Twos-complement machines with gradual underflow; */
-/*              no known machine ) */
-	    } else {
-		lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
-
-	} else {
-/* Computing MIN */
-	    i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
-	    lemin = min(i__1,gnmin);
-/*         ( A guess; no known machine ) */
-	    iwarn = TRUE_;
-	}
-	first = FALSE_;
-/* ** */
-/* Comment out this if block if EMIN is ok */
-	if (iwarn) {
-	    first = TRUE_;
-	    printf("\n\n WARNING. The value EMIN may be incorrect:- ");
-	    printf("EMIN = %8i\n",lemin);
-	    printf("If, after inspection, the value EMIN looks acceptable");
-            printf("please comment out \n the IF block as marked within the"); 
-            printf("code of routine DLAMC2, \n otherwise supply EMIN"); 
-            printf("explicitly.\n");
-         /*
-	    s_wsfe(&io___58);
-	    do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer));
-	    e_wsfe();
-         */
-	}
-/* ** */
-
-/*        Assume IEEE arithmetic if we found denormalised  numbers above, */
-/*        or if arithmetic seems to round in the  IEEE style,  determined */
-/*        in routine DLAMC1. A true IEEE machine should have both  things */
-/*        true; however, faulty machines may have one or the other. */
-
-	ieee = ieee || lieee1;
-
-/*        Compute  RMIN by successive division by  BETA. We could compute */
-/*        RMIN as BASE**( EMIN - 1 ),  but some machines underflow during */
-/*        this computation. */
-
-	lrmin = 1.;
-	i__1 = 1 - lemin;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d__1 = lrmin * rbase;
-	    lrmin = dlamc3_(&d__1, &zero);
-/* L30: */
-	}
-
-/*        Finally, call DLAMC5 to compute EMAX and RMAX. */
-
-	dlamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *eps = leps;
-    *emin = lemin;
-    *rmin = lrmin;
-    *emax = lemax;
-    *rmax = lrmax;
-
-    return 0;
-
-
-/*     End of DLAMC2 */
-
-} /* dlamc2_ */
-
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int dlamc4_(integer *emin, doublereal *start, integer *base)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1;
-
-    /* Local variables */
-    doublereal a;
-    integer i__;
-    doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase;
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DLAMC4 is a service routine for DLAMC2. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  EMIN    (output) INTEGER */
-/*          The minimum exponent before (gradual) underflow, computed by */
-/*          setting A = START and dividing by BASE until the previous A */
-/*          can not be recovered. */
-
-/*  START   (input) DOUBLE PRECISION */
-/*          The starting point for determining EMIN. */
-
-/*  BASE    (input) INTEGER */
-/*          The base of the machine. */
-
-/* ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    a = *start;
-    one = 1.;
-    rbase = one / *base;
-    zero = 0.;
-    *emin = 1;
-    d__1 = a * rbase;
-    b1 = dlamc3_(&d__1, &zero);
-    c1 = a;
-    c2 = a;
-    d1 = a;
-    d2 = a;
-/* +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */
-/*    $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP */
-L10:
-    if (c1 == a && c2 == a && d1 == a && d2 == a) {
-	--(*emin);
-	a = b1;
-	d__1 = a / *base;
-	b1 = dlamc3_(&d__1, &zero);
-	d__1 = b1 * *base;
-	c1 = dlamc3_(&d__1, &zero);
-	d1 = zero;
-	i__1 = *base;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d1 += b1;
-/* L20: */
-	}
-	d__1 = a * rbase;
-	b2 = dlamc3_(&d__1, &zero);
-	d__1 = b2 / rbase;
-	c2 = dlamc3_(&d__1, &zero);
-	d2 = zero;
-	i__1 = *base;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d2 += b2;
-/* L30: */
-	}
-	goto L10;
-    }
-/* +    END WHILE */
-
-    return 0;
-
-/*     End of DLAMC4 */
-
-} /* dlamc4_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin, 
-	logical *ieee, integer *emax, doublereal *rmax)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal d__1;
-
-    /* Local variables */
-    integer i__;
-    doublereal y, z__;
-    integer try__, lexp;
-    doublereal oldy;
-    integer uexp, nbits;
-    extern doublereal dlamc3_(doublereal *, doublereal *);
-    doublereal recbas;
-    integer exbits, expsum;
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DLAMC5 attempts to compute RMAX, the largest machine floating-point */
-/*  number, without overflow.  It assumes that EMAX + abs(EMIN) sum */
-/*  approximately to a power of 2.  It will fail on machines where this */
-/*  assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */
-/*  EMAX = 28718).  It will also fail if the value supplied for EMIN is */
-/*  too large (i.e. too close to zero), probably with overflow. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  BETA    (input) INTEGER */
-/*          The base of floating-point arithmetic. */
-
-/*  P       (input) INTEGER */
-/*          The number of base BETA digits in the mantissa of a */
-/*          floating-point value. */
-
-/*  EMIN    (input) INTEGER */
-/*          The minimum exponent before (gradual) underflow. */
-
-/*  IEEE    (input) LOGICAL */
-/*          A logical flag specifying whether or not the arithmetic */
-/*          system is thought to comply with the IEEE standard. */
-
-/*  EMAX    (output) INTEGER */
-/*          The largest exponent before overflow */
-
-/*  RMAX    (output) DOUBLE PRECISION */
-/*          The largest machine floating-point number. */
-
-/* ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     First compute LEXP and UEXP, two powers of 2 that bound */
-/*     abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */
-/*     approximately to the bound that is closest to abs(EMIN). */
-/*     (EMAX is the exponent of the required number RMAX). */
-
-    lexp = 1;
-    exbits = 1;
-L10:
-    try__ = lexp << 1;
-    if (try__ <= -(*emin)) {
-	lexp = try__;
-	++exbits;
-	goto L10;
-    }
-    if (lexp == -(*emin)) {
-	uexp = lexp;
-    } else {
-	uexp = try__;
-	++exbits;
-    }
-
-/*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater */
-/*     than or equal to EMIN. EXBITS is the number of bits needed to */
-/*     store the exponent. */
-
-    if (uexp + *emin > -lexp - *emin) {
-	expsum = lexp << 1;
-    } else {
-	expsum = uexp << 1;
-    }
-
-/*     EXPSUM is the exponent range, approximately equal to */
-/*     EMAX - EMIN + 1 . */
-
-    *emax = expsum + *emin - 1;
-    nbits = exbits + 1 + *p;
-
-/*     NBITS is the total number of bits needed to store a */
-/*     floating-point number. */
-
-    if (nbits % 2 == 1 && *beta == 2) {
-
-/*        Either there are an odd number of bits used to store a */
-/*        floating-point number, which is unlikely, or some bits are */
-/*        not used in the representation of numbers, which is possible, */
-/*        (e.g. Cray machines) or the mantissa has an implicit bit, */
-/*        (e.g. IEEE machines, Dec Vax machines), which is perhaps the */
-/*        most likely. We have to assume the last alternative. */
-/*        If this is true, then we need to reduce EMAX by one because */
-/*        there must be some way of representing zero in an implicit-bit */
-/*        system. On machines like Cray, we are reducing EMAX by one */
-/*        unnecessarily. */
-
-	--(*emax);
-    }
-
-    if (*ieee) {
-
-/*        Assume we are on an IEEE machine which reserves one exponent */
-/*        for infinity and NaN. */
-
-	--(*emax);
-    }
-
-/*     Now create RMAX, the largest machine number, which should */
-/*     be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */
-
-/*     First compute 1.0 - BETA**(-P), being careful that the */
-/*     result is less than 1.0 . */
-
-    recbas = 1. / *beta;
-    z__ = *beta - 1.;
-    y = 0.;
-    i__1 = *p;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	z__ *= recbas;
-	if (y < 1.) {
-	    oldy = y;
-	}
-	y = dlamc3_(&y, &z__);
-/* L20: */
-    }
-    if (y >= 1.) {
-	y = oldy;
-    }
-
-/*     Now multiply by BETA**EMAX to get RMAX. */
-
-    i__1 = *emax;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	d__1 = y * *beta;
-	y = dlamc3_(&d__1, &c_b32);
-/* L30: */
-    }
-
-    *rmax = y;
-    return 0;
-
-/*     End of DLAMC5 */
-
-} /* dlamc5_ */
-
-#endif
--- a/3rdparty/lapack/dlamch_custom.c
+++ b/3rdparty/lapack/dlamch_custom.c
+#include "clapack.h"
+#include <float.h>
+#include <stdio.h>
+
+/* *********************************************************************** */
+
+doublereal dlamc3_(doublereal *a, doublereal *b)
+{
+    /* System generated locals */
+    doublereal ret_val;
+    
+    
+    /*  -- LAPACK auxiliary routine (version 3.1) -- */
+    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+    /*     November 2006 */
+    
+    /*     .. Scalar Arguments .. */
+    /*     .. */
+    
+    /*  Purpose */
+    /*  ======= */
+    
+    /*  DLAMC3  is intended to force  A  and  B  to be stored prior to doing */
+    /*  the addition of  A  and  B ,  for use in situations where optimizers */
+    /*  might hold one of these in a register. */
+    
+    /*  Arguments */
+    /*  ========= */
+    
+    /*  A       (input) DOUBLE PRECISION */
+    /*  B       (input) DOUBLE PRECISION */
+    /*          The values A and B. */
+    
+    /* ===================================================================== */
+    
+    /*     .. Executable Statements .. */
+    
+    ret_val = *a + *b;
+    
+    return ret_val;
+    
+    /*     End of DLAMC3 */
+    
+} /* dlamc3_ */
+
+
+/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
+   taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
+
+#ifndef DBL_DIGITS
+#define DBL_DIGITS 53
+#endif
+
+const doublereal lapack_dlamch_tab[] =
+{
+    0, FLT_RADIX, DBL_EPSILON, DBL_MAX_EXP, DBL_MIN_EXP, DBL_DIGITS, DBL_MAX,
+    DBL_EPSILON*FLT_RADIX, 1, DBL_MIN*(1 + DBL_EPSILON), DBL_MIN
+};
--- a/3rdparty/lapack/dlartg.c
+++ b/3rdparty/lapack/dlartg.c
-/* dlartg.f -- translated by f2c (version 20061008).
-   You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-		http://www.netlib.org/f2c/libf2c.zip
-*/
-
 #include "clapack.h"


@@ -20,17 +8,14 @@
    integer i__1;
    doublereal d__1, d__2;

-    /* Builtin functions */
-    double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
-
    /* Local variables */
    integer i__;
    doublereal f1, g1, eps, scale;
    integer count;
-    doublereal safmn2, safmx2;
-    extern doublereal dlamch_(char *);
-    doublereal safmin;
-
+    
+    static doublereal safmn2, safmx2;
+    static doublereal safmin;
+    static volatile logical FIRST = TRUE_;

 /*  -- LAPACK auxiliary routine (version 3.2) -- */
 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
@@ -97,15 +82,16 @@
 /*     .. */
 /*     .. Executable Statements .. */

-/*     IF( FIRST ) THEN */
-    safmin = dlamch_("S");
-    eps = dlamch_("E");
-    d__1 = dlamch_("B");
-    i__1 = (integer) (log(safmin / eps) / log(dlamch_("B")) / 2.);
-    safmn2 = pow_di(&d__1, &i__1);
-    safmx2 = 1. / safmn2;
-/*        FIRST = .FALSE. */
-/*     END IF */
+    if( FIRST )
+    {
+        safmin = dlamch_("S");
+        eps = dlamch_("E");
+        d__1 = dlamch_("B");
+        i__1 = (integer) (log(safmin / eps) / log(dlamch_("B")) / 2.);
+        safmn2 = pow_di(&d__1, &i__1);
+        safmx2 = 1. / safmn2;
+        FIRST = FALSE_;
+    }
    if (*g == 0.) {
 	*cs = 1.;
 	*sn = 0.;

--- a/3rdparty/lapack/dlasr.c
+++ b/3rdparty/lapack/dlasr.c
--- a/3rdparty/lapack/ilaenv.c
+++ b/3rdparty/lapack/ilaenv.c
-/* ilaenv.f -- translated by f2c (version 20061008).
-   You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-		http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "clapack.h"
-
-#include "string.h"
-
-/* Table of constant values */
-
-static integer c__1 = 1;
-static real c_b163 = 0.f;
-static real c_b164 = 1.f;
-static integer c__0 = 0;
-
-integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, 
-	integer *n2, integer *n3, integer *n4)
-{
-    /* System generated locals */
-    integer ret_val;
-
-    /* Builtin functions */
-    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
-    integer s_cmp(char *, char *, ftnlen, ftnlen);
-
-    /* Local variables */
-    integer i__;
-    char c1[1], c2[2], c3[3], c4[2];
-    integer ic, nb, iz, nx;
-    logical cname;
-    integer nbmin;
-    logical sname;
-    extern integer ieeeck_(integer *, real *, real *);
-    char subnam[6];
-    extern integer iparmq_(integer *, char *, char *, integer *, integer *, 
-	    integer *, integer *);
-
-    ftnlen name_len, opts_len;
-
-    name_len = (ftnlen)strlen (name__);
-    opts_len = (ftnlen)strlen (opts);
-
-/*  -- LAPACK auxiliary routine (version 3.2) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     January 2007 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ILAENV is called from the LAPACK routines to choose problem-dependent */
-/*  parameters for the local environment.  See ISPEC for a description of */
-/*  the parameters. */
-
-/*  ILAENV returns an INTEGER */
-/*  if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
-/*  if ILAENV < 0:  if ILAENV = -k, the k-th argument had an illegal value. */
-
-/*  This version provides a set of parameters which should give good, */
-/*  but not optimal, performance on many of the currently available */
-/*  computers.  Users are encouraged to modify this subroutine to set */
-/*  the tuning parameters for their particular machine using the option */
-/*  and problem size information in the arguments. */
-
-/*  This routine will not function correctly if it is converted to all */
-/*  lower case.  Converting it to all upper case is allowed. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  ISPEC   (input) INTEGER */
-/*          Specifies the parameter to be returned as the value of */
-/*          ILAENV. */
-/*          = 1: the optimal blocksize; if this value is 1, an unblocked */
-/*               algorithm will give the best performance. */
-/*          = 2: the minimum block size for which the block routine */
-/*               should be used; if the usable block size is less than */
-/*               this value, an unblocked routine should be used. */
-/*          = 3: the crossover point (in a block routine, for N less */
-/*               than this value, an unblocked routine should be used) */
-/*          = 4: the number of shifts, used in the nonsymmetric */
-/*               eigenvalue routines (DEPRECATED) */
-/*          = 5: the minimum column dimension for blocking to be used; */
-/*               rectangular blocks must have dimension at least k by m, */
-/*               where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
-/*          = 6: the crossover point for the SVD (when reducing an m by n */
-/*               matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
-/*               this value, a QR factorization is used first to reduce */
-/*               the matrix to a triangular form.) */
-/*          = 7: the number of processors */
-/*          = 8: the crossover point for the multishift QR method */
-/*               for nonsymmetric eigenvalue problems (DEPRECATED) */
-/*          = 9: maximum size of the subproblems at the bottom of the */
-/*               computation tree in the divide-and-conquer algorithm */
-/*               (used by xGELSD and xGESDD) */
-/*          =10: ieee NaN arithmetic can be trusted not to trap */
-/*          =11: infinity arithmetic can be trusted not to trap */
-/*          12 <= ISPEC <= 16: */
-/*               xHSEQR or one of its subroutines, */
-/*               see IPARMQ for detailed explanation */
-
-/*  NAME    (input) CHARACTER*(*) */
-/*          The name of the calling subroutine, in either upper case or */
-/*          lower case. */
-
-/*  OPTS    (input) CHARACTER*(*) */
-/*          The character options to the subroutine NAME, concatenated */
-/*          into a single character string.  For example, UPLO = 'U', */
-/*          TRANS = 'T', and DIAG = 'N' for a triangular routine would */
-/*          be specified as OPTS = 'UTN'. */
-
-/*  N1      (input) INTEGER */
-/*  N2      (input) INTEGER */
-/*  N3      (input) INTEGER */
-/*  N4      (input) INTEGER */
-/*          Problem dimensions for the subroutine NAME; these may not all */
-/*          be required. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  The following conventions have been used when calling ILAENV from the */
-/*  LAPACK routines: */
-/*  1)  OPTS is a concatenation of all of the character options to */
-/*      subroutine NAME, in the same order that they appear in the */
-/*      argument list for NAME, even if they are not used in determining */
-/*      the value of the parameter specified by ISPEC. */
-/*  2)  The problem dimensions N1, N2, N3, N4 are specified in the order */
-/*      that they appear in the argument list for NAME.  N1 is used */
-/*      first, N2 second, and so on, and unused problem dimensions are */
-/*      passed a value of -1. */
-/*  3)  The parameter value returned by ILAENV is checked for validity in */
-/*      the calling subroutine.  For example, ILAENV is used to retrieve */
-/*      the optimal blocksize for STRTRI as follows: */
-
-/*      NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
-/*      IF( NB.LE.1 ) NB = MAX( 1, N ) */
-
-/*  ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    switch (*ispec) {
-	case 1:  goto L10;
-	case 2:  goto L10;
-	case 3:  goto L10;
-	case 4:  goto L80;
-	case 5:  goto L90;
-	case 6:  goto L100;
-	case 7:  goto L110;
-	case 8:  goto L120;
-	case 9:  goto L130;
-	case 10:  goto L140;
-	case 11:  goto L150;
-	case 12:  goto L160;
-	case 13:  goto L160;
-	case 14:  goto L160;
-	case 15:  goto L160;
-	case 16:  goto L160;
-    }
-
-/*     Invalid value for ISPEC */
-
-    ret_val = -1;
-    return ret_val;
-
-L10:
-
-/*     Convert NAME to upper case if the first character is lower case. */
-
-    ret_val = 1;
-    s_copy(subnam, name__, (ftnlen)1, name_len);
-    ic = *(unsigned char *)subnam;
-    iz = 'Z';
-    if (iz == 90 || iz == 122) {
-
-/*        ASCII character set */
-
-	if (ic >= 97 && ic <= 122) {
-	    *(unsigned char *)subnam = (char) (ic - 32);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 97 && ic <= 122) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
-		}
-/* L20: */
-	    }
-	}
-
-    } else if (iz == 233 || iz == 169) {
-
-/*        EBCDIC character set */
-
-	if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 162 && 
-		ic <= 169) {
-	    *(unsigned char *)subnam = (char) (ic + 64);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 129 && ic <= 137 || ic >= 145 && ic <= 153 || ic >= 
-			162 && ic <= 169) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
-		}
-/* L30: */
-	    }
-	}
-
-    } else if (iz == 218 || iz == 250) {
-
-/*        Prime machines:  ASCII+128 */
-
-	if (ic >= 225 && ic <= 250) {
-	    *(unsigned char *)subnam = (char) (ic - 32);
-	    for (i__ = 2; i__ <= 6; ++i__) {
-		ic = *(unsigned char *)&subnam[i__ - 1];
-		if (ic >= 225 && ic <= 250) {
-		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
-		}
-/* L40: */
-	    }
-	}
-    }
-
-    *(unsigned char *)c1 = *(unsigned char *)subnam;
-    sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
-    cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
-    if (! (cname || sname)) {
-	return ret_val;
-    }
-    s_copy(c2, subnam + 1, (ftnlen)1, (ftnlen)2);
-    s_copy(c3, subnam + 3, (ftnlen)1, (ftnlen)3);
-    s_copy(c4, c3 + 1, (ftnlen)1, (ftnlen)2);
-
-    switch (*ispec) {
-	case 1:  goto L50;
-	case 2:  goto L60;
-	case 3:  goto L70;
-    }
-
-L50:
-
-/*     ISPEC = 1:  block size */
-
-/*     In these examples, separate code is provided for setting NB for */
-/*     real and complex.  We assume that NB will take the same value in */
-/*     single or double precision. */
-
-    nb = 1;
-
-    if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	} else if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, 
-		"RQF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)
-		1, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) 
-		== 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 32;
-	    } else {
-		nb = 32;
-	    }
-	} else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "PO", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	} else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    nb = 32;
-	} else if (sname && s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) {
-	    nb = 64;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
-	    nb = 64;
-	} else if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    nb = 32;
-	} else if (s_cmp(c3, "GST", (ftnlen)1, (ftnlen)3) == 0) {
-	    nb = 64;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nb = 32;
-	    }
-	}
-    } else if (s_cmp(c2, "GB", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		if (*n4 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    } else {
-		if (*n4 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    }
-	}
-    } else if (s_cmp(c2, "PB", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		if (*n2 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    } else {
-		if (*n2 <= 64) {
-		    nb = 1;
-		} else {
-		    nb = 32;
-		}
-	    }
-	}
-    } else if (s_cmp(c2, "TR", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (s_cmp(c2, "LA", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "UUM", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nb = 64;
-	    } else {
-		nb = 64;
-	    }
-	}
-    } else if (sname && s_cmp(c2, "ST", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "EBZ", (ftnlen)1, (ftnlen)3) == 0) {
-	    nb = 1;
-	}
-    }
-    ret_val = nb;
-    return ret_val;
-
-L60:
-
-/*     ISPEC = 2:  minimum block size */
-
-    nbmin = 2;
-    if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
-		ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, (
-		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0)
-		 {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	} else if (s_cmp(c3, "TRI", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 2;
-	    } else {
-		nbmin = 2;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRF", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nbmin = 8;
-	    } else {
-		nbmin = 8;
-	    }
-	} else if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    nbmin = 2;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    nbmin = 2;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	} else if (*(unsigned char *)c3 == 'M') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nbmin = 2;
-	    }
-	}
-    }
-    ret_val = nbmin;
-    return ret_val;
-
-L70:
-
-/*     ISPEC = 3:  crossover point */
-
-    nx = 0;
-    if (s_cmp(c2, "GE", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "QRF", (ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "RQF", (
-		ftnlen)1, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)1, (
-		ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)1, (ftnlen)3) == 0)
-		 {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	} else if (s_cmp(c3, "HRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	} else if (s_cmp(c3, "BRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    if (sname) {
-		nx = 128;
-	    } else {
-		nx = 128;
-	    }
-	}
-    } else if (s_cmp(c2, "SY", (ftnlen)1, (ftnlen)2) == 0) {
-	if (sname && s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    nx = 32;
-	}
-    } else if (cname && s_cmp(c2, "HE", (ftnlen)1, (ftnlen)2) == 0) {
-	if (s_cmp(c3, "TRD", (ftnlen)1, (ftnlen)3) == 0) {
-	    nx = 32;
-	}
-    } else if (sname && s_cmp(c2, "OR", (ftnlen)1, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nx = 128;
-	    }
-	}
-    } else if (cname && s_cmp(c2, "UN", (ftnlen)1, (ftnlen)2) == 0) {
-	if (*(unsigned char *)c3 == 'G') {
-	    if (s_cmp(c4, "QR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "RQ", 
-		    (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)1, (
-		    ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)1, (ftnlen)2) ==
-		     0 || s_cmp(c4, "HR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(
-		    c4, "TR", (ftnlen)1, (ftnlen)2) == 0 || s_cmp(c4, "BR", (
-		    ftnlen)1, (ftnlen)2) == 0) {
-		nx = 128;
-	    }
-	}
-    }
-    ret_val = nx;
-    return ret_val;
-
-L80:
-
-/*     ISPEC = 4:  number of shifts (used by xHSEQR) */
-
-    ret_val = 6;
-    return ret_val;
-
-L90:
-
-/*     ISPEC = 5:  minimum column dimension (not used) */
-
-    ret_val = 2;
-    return ret_val;
-
-L100:
-
-/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
-
-    ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
-    return ret_val;
-
-L110:
-
-/*     ISPEC = 7:  number of processors (not used) */
-
-    ret_val = 1;
-    return ret_val;
-
-L120:
-
-/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
-
-    ret_val = 50;
-    return ret_val;
-
-L130:
-
-/*     ISPEC = 9:  maximum size of the subproblems at the bottom of the */
-/*                 computation tree in the divide-and-conquer algorithm */
-/*                 (used by xGELSD and xGESDD) */
-
-    ret_val = 25;
-    return ret_val;
-
-L140:
-
-/*     ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
-
-/*     ILAENV = 0 */
-    ret_val = 1;
-    if (ret_val == 1) {
-	ret_val = ieeeck_(&c__1, &c_b163, &c_b164);
-    }
-    return ret_val;
-
-L150:
-
-/*     ISPEC = 11: infinity arithmetic can be trusted not to trap */
-
-/*     ILAENV = 0 */
-    ret_val = 1;
-    if (ret_val == 1) {
-	ret_val = ieeeck_(&c__0, &c_b163, &c_b164);
-    }
-    return ret_val;
-
-L160:
-
-/*     12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
-
-    ret_val = iparmq_(ispec, name__, opts, n1, n2, n3, n4)
-	    ;
-    return ret_val;
-
-/*     End of ILAENV */
-
-} /* ilaenv_ */
--- a/3rdparty/lapack/ilaenv_custom.c
+++ b/3rdparty/lapack/ilaenv_custom.c
+/* ilaenv.f -- translated by f2c (version 20061008).
+   You must link the resulting object file with libf2c:
+	on Microsoft Windows system, link with libf2c.lib;
+	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+	or, if you install libf2c.a in a standard place, with -lf2c -lm
+	-- in that order, at the end of the command line, as in
+		cc *.o -lf2c -lm
+	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+		http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "clapack.h"
+
+#include "string.h"
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static real c_b163 = 0.f;
+static real c_b164 = 1.f;
+static integer c__0 = 0;
+
+integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, 
+	integer *n2, integer *n3, integer *n4)
+{
+/*  -- LAPACK auxiliary routine (version 3.2) -- */
+/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/*     January 2007 */
+
+/*     .. Scalar Arguments .. */
+/*     .. */
+
+/*  Purpose */
+/*  ======= */
+
+/*  ILAENV is called from the LAPACK routines to choose problem-dependent */
+/*  parameters for the local environment.  See ISPEC for a description of */
+/*  the parameters. */
+
+/*  ILAENV returns an INTEGER */
+/*  if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC */
+/*  if ILAENV < 0:  if ILAENV = -k, the k-th argument had an illegal value. */
+
+/*  This version provides a set of parameters which should give good, */
+/*  but not optimal, performance on many of the currently available */
+/*  computers.  Users are encouraged to modify this subroutine to set */
+/*  the tuning parameters for their particular machine using the option */
+/*  and problem size information in the arguments. */
+
+/*  This routine will not function correctly if it is converted to all */
+/*  lower case.  Converting it to all upper case is allowed. */
+
+/*  Arguments */
+/*  ========= */
+
+/*  ISPEC   (input) INTEGER */
+/*          Specifies the parameter to be returned as the value of */
+/*          ILAENV. */
+/*          = 1: the optimal blocksize; if this value is 1, an unblocked */
+/*               algorithm will give the best performance. */
+/*          = 2: the minimum block size for which the block routine */
+/*               should be used; if the usable block size is less than */
+/*               this value, an unblocked routine should be used. */
+/*          = 3: the crossover point (in a block routine, for N less */
+/*               than this value, an unblocked routine should be used) */
+/*          = 4: the number of shifts, used in the nonsymmetric */
+/*               eigenvalue routines (DEPRECATED) */
+/*          = 5: the minimum column dimension for blocking to be used; */
+/*               rectangular blocks must have dimension at least k by m, */
+/*               where k is given by ILAENV(2,...) and m by ILAENV(5,...) */
+/*          = 6: the crossover point for the SVD (when reducing an m by n */
+/*               matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds */
+/*               this value, a QR factorization is used first to reduce */
+/*               the matrix to a triangular form.) */
+/*          = 7: the number of processors */
+/*          = 8: the crossover point for the multishift QR method */
+/*               for nonsymmetric eigenvalue problems (DEPRECATED) */
+/*          = 9: maximum size of the subproblems at the bottom of the */
+/*               computation tree in the divide-and-conquer algorithm */
+/*               (used by xGELSD and xGESDD) */
+/*          =10: ieee NaN arithmetic can be trusted not to trap */
+/*          =11: infinity arithmetic can be trusted not to trap */
+/*          12 <= ISPEC <= 16: */
+/*               xHSEQR or one of its subroutines, */
+/*               see IPARMQ for detailed explanation */
+
+/*  NAME    (input) CHARACTER*(*) */
+/*          The name of the calling subroutine, in either upper case or */
+/*          lower case. */
+
+/*  OPTS    (input) CHARACTER*(*) */
+/*          The character options to the subroutine NAME, concatenated */
+/*          into a single character string.  For example, UPLO = 'U', */
+/*          TRANS = 'T', and DIAG = 'N' for a triangular routine would */
+/*          be specified as OPTS = 'UTN'. */
+
+/*  N1      (input) INTEGER */
+/*  N2      (input) INTEGER */
+/*  N3      (input) INTEGER */
+/*  N4      (input) INTEGER */
+/*          Problem dimensions for the subroutine NAME; these may not all */
+/*          be required. */
+
+/*  Further Details */
+/*  =============== */
+
+/*  The following conventions have been used when calling ILAENV from the */
+/*  LAPACK routines: */
+/*  1)  OPTS is a concatenation of all of the character options to */
+/*      subroutine NAME, in the same order that they appear in the */
+/*      argument list for NAME, even if they are not used in determining */
+/*      the value of the parameter specified by ISPEC. */
+/*  2)  The problem dimensions N1, N2, N3, N4 are specified in the order */
+/*      that they appear in the argument list for NAME.  N1 is used */
+/*      first, N2 second, and so on, and unused problem dimensions are */
+/*      passed a value of -1. */
+/*  3)  The parameter value returned by ILAENV is checked for validity in */
+/*      the calling subroutine.  For example, ILAENV is used to retrieve */
+/*      the optimal blocksize for STRTRI as follows: */
+
+/*      NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) */
+/*      IF( NB.LE.1 ) NB = MAX( 1, N ) */
+
+/*  ===================================================================== */
+
+/*     .. Local Scalars .. */
+/*     .. */
+/*     .. Intrinsic Functions .. */
+/*     .. */
+/*     .. External Functions .. */
+/*     .. */
+/*     .. Executable Statements .. */
+
+    switch (*ispec) {
+	case 1:
+	   /*     ISPEC = 1:  block size */
+
+       /*     In these examples, separate code is provided for setting NB for */
+       /*     real and complex.  We assume that NB will take the same value in */
+       /*     single or double precision. */
+        return 1; 
+	case 2:
+	    /*     ISPEC = 2:  minimum block size */
+        return 2;
+	case 3:
+	    /*     ISPEC = 3:  crossover point */
+        return 3;
+	case 4:
+	    /*     ISPEC = 4:  number of shifts (used by xHSEQR) */
+        return 6;        
+	case 5:
+	    /*     ISPEC = 5:  minimum column dimension (not used) */
+        return 2;
+	case 6:
+	    /*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
+        return  (integer) ((real) min(*n1,*n2) * 1.6f);
+	case 7:
+	    /*     ISPEC = 7:  number of processors (not used) */
+        return 1;
+	case 8:
+	    /*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
+        return 50;
+	case 9:
+	    /*     ISPEC = 9:  maximum size of the subproblems at the bottom of the */
+        /*                 computation tree in the divide-and-conquer algorithm */
+        /*                 (used by xGELSD and xGESDD) */
+	    return 25;
+	case 10:
+	    /*     ISPEC = 10: ieee NaN arithmetic can be trusted not to trap */
+	    return ieeeck_(&c__1, &c_b163, &c_b164);
+	case 11:
+	    /*     ISPEC = 11: infinity arithmetic can be trusted not to trap */
+	    return ieeeck_(&c__0, &c_b163, &c_b164);
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+	    /*     12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
+        return iparmq_(ispec, name__, opts, n1, n2, n3, n4);
+	default:
+        break;
+    }
+
+    /* Invalid value for ISPEC */
+    return -1;
+
+/*     End of ILAENV */
+
+} /* ilaenv_ */
--- a/3rdparty/lapack/sgemv.c
+++ b/3rdparty/lapack/sgemv.c
-/* sgemv.f -- translated by f2c (version 20061008).
-   You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-		http://www.netlib.org/f2c/libf2c.zip
-*/
-
-#include "clapack.h"
-
-
-/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, 
-	real *a, integer *lda, real *x, integer *incx, real *beta, real *y, 
-	integer *incy)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    real temp;
-    integer lenx, leny;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SGEMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-
-/*  Arguments */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
-	    ) {
-	info = 1;
-    } else if (*m < 0) {
-	info = 2;
-    } else if (*n < 0) {
-	info = 3;
-    } else if (*lda < max(1,*m)) {
-	info = 6;
-    } else if (*incx == 0) {
-	info = 8;
-    } else if (*incy == 0) {
-	info = 11;
-    }
-    if (info != 0) {
-	xerbla_("SGEMV ", &info);
-	return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
-	return 0;
-    }
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N")) {
-	lenx = *n;
-	leny = *m;
-    } else {
-	lenx = *m;
-	leny = *n;
-    }
-    if (*incx > 0) {
-	kx = 1;
-    } else {
-	kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-	ky = 1;
-    } else {
-	ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-	if (*incy == 1) {
-	    if (*beta == 0.f) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = 0.f;
-/* L10: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[i__] = *beta * y[i__];
-/* L20: */
-		}
-	    }
-	} else {
-	    iy = ky;
-	    if (*beta == 0.f) {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = 0.f;
-		    iy += *incy;
-/* L30: */
-		}
-	    } else {
-		i__1 = leny;
-		for (i__ = 1; i__ <= i__1; ++i__) {
-		    y[iy] = *beta * y[iy];
-		    iy += *incy;
-/* L40: */
-		}
-	    }
-	}
-    }
-    if (*alpha == 0.f) {
-	return 0;
-    }
-    if (lsame_(trans, "N")) {
-
-/*        Form  y := alpha*A*x + y. */
-
-	jx = kx;
-	if (*incy == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.f) {
-		    temp = *alpha * x[jx];
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-		    }
-		}
-		jx += *incx;
-/* L60: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		if (x[jx] != 0.f) {
-		    temp = *alpha * x[jx];
-		    iy = ky;
-		    i__2 = *m;
-		    for (i__ = 1; i__ <= i__2; ++i__) {
-			y[iy] += temp * a[i__ + j * a_dim1];
-			iy += *incy;
-/* L70: */
-		    }
-		}
-		jx += *incx;
-/* L80: */
-	    }
-	}
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-	jy = ky;
-	if (*incx == 1) {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.f;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L100: */
-	    }
-	} else {
-	    i__1 = *n;
-	    for (j = 1; j <= i__1; ++j) {
-		temp = 0.f;
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    temp += a[i__ + j * a_dim1] * x[ix];
-		    ix += *incx;
-/* L110: */
-		}
-		y[jy] += *alpha * temp;
-		jy += *incy;
-/* L120: */
-	    }
-	}
-    }
-
-    return 0;
-
-/*     End of SGEMV . */
-
-} /* sgemv_ */
--- a/3rdparty/lapack/sgemv_custom.c
+++ b/3rdparty/lapack/sgemv_custom.c
+#include "clapack.h"
+#include <assert.h>
+
+/* Subroutine */ int sgemv_(char *_trans, integer *_m, integer *_n, real *_alpha, 
+                            real *a, integer *_lda, real *x, integer *_incx, real *_beta, real *y, 
+                            integer *_incy)
+{
+    
+    /*     .. Scalar Arguments .. */
+    /*     .. */
+    /*     .. Array Arguments .. */
+    /*     .. */
+    
+    /*  Purpose */
+    /*  ======= */
+    
+    /*  SGEMV  performs one of the matrix-vector operations */
+    
+    /*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
+    
+    /*  where alpha and beta are scalars, x and y are vectors and A is an */
+    /*  m by n matrix. */
+    
+    /*  Arguments */
+    /*  ========== */
+    
+    /*  TRANS  - CHARACTER*1. */
+    /*           On entry, TRANS specifies the operation to be performed as */
+    /*           follows: */
+    
+    /*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
+    
+    /*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
+    
+    /*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
+    
+    /*           Unchanged on exit. */
+    
+    /*  M      - INTEGER. */
+    /*           On entry, M specifies the number of rows of the matrix A. */
+    /*           M must be at least zero. */
+    /*           Unchanged on exit. */
+    
+    /*  N      - INTEGER. */
+    /*           On entry, N specifies the number of columns of the matrix A. */
+    /*           N must be at least zero. */
+    /*           Unchanged on exit. */
+    
+    /*  ALPHA  - REAL            . */
+    /*           On entry, ALPHA specifies the scalar alpha. */
+    /*           Unchanged on exit. */
+    
+    /*  A      - REAL             array of DIMENSION ( LDA, n ). */
+    /*           Before entry, the leading m by n part of the array A must */
+    /*           contain the matrix of coefficients. */
+    /*           Unchanged on exit. */
+    
+    /*  LDA    - INTEGER. */
+    /*           On entry, LDA specifies the first dimension of A as declared */
+    /*           in the calling (sub) program. LDA must be at least */
+    /*           max( 1, m ). */
+    /*           Unchanged on exit. */
+    
+    /*  X      - REAL             array of DIMENSION at least */
+    /*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
+    /*           and at least */
+    /*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
+    /*           Before entry, the incremented array X must contain the */
+    /*           vector x. */
+    /*           Unchanged on exit. */
+    
+    /*  INCX   - INTEGER. */
+    /*           On entry, INCX specifies the increment for the elements of */
+    /*           X. INCX must not be zero. */
+    /*           Unchanged on exit. */
+    
+    /*  BETA   - REAL            . */
+    /*           On entry, BETA specifies the scalar beta. When BETA is */
+    /*           supplied as zero then Y need not be set on input. */
+    /*           Unchanged on exit. */
+    
+    /*  Y      - REAL             array of DIMENSION at least */
+    /*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
+    /*           and at least */
+    /*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
+    /*           Before entry with BETA non-zero, the incremented array Y */
+    /*           must contain the vector y. On exit, Y is overwritten by the */
+    /*           updated vector y. */
+    
+    /*  INCY   - INTEGER. */
+    /*           On entry, INCY specifies the increment for the elements of */
+    /*           Y. INCY must not be zero. */
+    /*           Unchanged on exit. */
+    
+    
+    /*  Level 2 Blas routine. */
+    
+    /*  -- Written on 22-October-1986. */
+    /*     Jack Dongarra, Argonne National Lab. */
+    /*     Jeremy Du Croz, Nag Central Office. */
+    /*     Sven Hammarling, Nag Central Office. */
+    /*     Richard Hanson, Sandia National Labs. */
+    
+    /*     Test the input parameters. */
+    
+    /* Function Body */
+    char trans = lapack_toupper(_trans[0]);
+    integer i, j, m = *_m, n = *_n, lda = *_lda, incx = *_incx, incy = *_incy;
+    integer leny = trans == 'N' ? m : n, lenx = trans == 'N' ? n : m;
+    real alpha = *_alpha, beta = *_beta;
+    
+    integer info = 0;
+    if (trans != 'N' && trans != 'T' && trans != 'C')
+        info = 1;
+    else if (m < 0)
+        info = 2;
+    else if (n < 0)
+        info = 3;
+    else if (lda < max(1,m))
+        info = 6;
+    else if (incx == 0)
+        info = 8;
+    else if (incy == 0)
+        info = 11;
+    
+    if (info != 0)
+    {
+        xerbla_("SGEMV ", &info);
+        return 0;
+    }
+    
+    if( incy < 0 )
+        y -= incy*(leny - 1);
+    if( incx < 0 )
+        x -= incx*(lenx - 1);
+    
+    /*     Start the operations. In this version the elements of A are */
+    /*     accessed sequentially with one pass through A. */
+    
+    if( beta != 1.f )
+    {
+        if( incy == 1 )
+        {
+            if( beta == 0.f )
+                for( i = 0; i < leny; i++ )
+                    y[i] = 0.f;
+            else
+                for( i = 0; i < leny; i++ )
+                    y[i] *= beta;
+        }
+        else
+        {
+            if( beta == 0.f )
+                for( i = 0; i < leny; i++ )
+                    y[i*incy] = 0.f;
+            else
+                for( i = 0; i < leny; i++ )
+                    y[i*incy] *= beta;
+        }
+    }
+    
+    if( alpha == 0.f )
+        ;
+    else if( trans == 'N' )
+    {
+        for( i = 0; i < n; i++, a += lda )
+        {
+            real s = x[i*incx];
+            if( s == 0.f )
+                continue;
+            s *= alpha;
+            
+            for( j = 0; j <= m - 4; j += 4 )
+            {
+                real t0 = y[j] + s*a[j];
+                real t1 = y[j+1] + s*a[j+1];
+                y[j] = t0; y[j+1] = t1;
+                t0 = y[j+2] + s*a[j+2];
+                t1 = y[j+3] + s*a[j+3];
+                y[j+2] = t0; y[j+3] = t1;
+            }
+
+            for( ; j < m; j++ )
+                y[j] += s*a[j];
+        }
+    }
+    else
+    {
+        for( i = 0; i < n; i++, a += lda )
+        {
+            real s = 0;
+            for( j = 0; j <= m - 4; j += 4 )
+                s += x[j]*a[j] + x[j+1]*a[j+1] + x[j+2]*a[j+2] + x[j+3]*a[j+3];
+            for( ; j < m; j++ )
+                s += x[j]*a[j];
+            y[i*incy] += alpha*s;
+        }
+    }
+    
+    return 0;
+    
+    /*     End of SGEMV . */
+    
+} /* sgemv_ */
--- a/3rdparty/lapack/sger.c
+++ b/3rdparty/lapack/sger.c
-/* sger.f -- translated by f2c (version 20061008).
-   You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-		http://www.netlib.org/f2c/libf2c.zip
-*/
-
 #include "clapack.h"

-
-/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, 
-	integer *incx, real *y, integer *incy, real *a, integer *lda)
+/* Subroutine */ int sger_(integer *_m, integer *_n, real *_alpha, 
+	real *x, integer *_incx, real *y, integer *_incy, 
+	real *a, integer *_lda)
 {
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    integer i__, j, ix, jy, kx, info;
-    real temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *);

 /*     .. Scalar Arguments .. */
 /*     .. */
@@ -52,11 +33,11 @@
 /*           N must be at least zero. */
 /*           Unchanged on exit. */

-/*  ALPHA  - REAL            . */
+/*  ALPHA  - SINGLE PRECISION. */
 /*           On entry, ALPHA specifies the scalar alpha. */
 /*           Unchanged on exit. */

-/*  X      - REAL             array of dimension at least */
+/*  X      - SINGLE PRECISION array of dimension at least */
 /*           ( 1 + ( m - 1 )*abs( INCX ) ). */
 /*           Before entry, the incremented array X must contain the m */
 /*           element vector x. */
@@ -67,7 +48,7 @@
 /*           X. INCX must not be zero. */
 /*           Unchanged on exit. */

-/*  Y      - REAL             array of dimension at least */
+/*  Y      - SINGLE PRECISION array of dimension at least */
 /*           ( 1 + ( n - 1 )*abs( INCY ) ). */
 /*           Before entry, the incremented array Y must contain the n */
 /*           element vector y. */
@@ -78,7 +59,7 @@
 /*           Y. INCY must not be zero. */
 /*           Unchanged on exit. */

-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
+/*  A      - SINGLE PRECISION array of DIMENSION ( LDA, n ). */
 /*           Before entry, the leading m by n part of the array A must */
 /*           contain the matrix of coefficients. On exit, A is */
 /*           overwritten by the updated matrix. */
@@ -110,80 +91,70 @@

 /*     Test the input parameters. */

-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-
    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-	info = 1;
-    } else if (*n < 0) {
-	info = 2;
-    } else if (*incx == 0) {
-	info = 5;
-    } else if (*incy == 0) {
-	info = 7;
-    } else if (*lda < max(1,*m)) {
-	info = 9;
-    }
-    if (info != 0) {
-	xerbla_("SGER  ", &info);
-	return 0;
+    integer i, j, m = *_m, n = *_n, incx = *_incx, incy = *_incy, lda = *_lda;
+    real alpha = *_alpha;
+    integer info = 0;
+    
+    if (m < 0)
+        info = 1;
+    else if (n < 0)
+        info = 2;
+    else if (incx == 0)
+        info = 5;
+    else if (incy == 0)
+        info = 7;
+    else if (lda < max(1,m))
+        info = 9;
+    
+    if (info != 0)
+    {
+        xerbla_("SGER  ", &info);
+        return 0;
    }

-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.f) {
-	return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-	jy = 1;
-    } else {
-	jy = 1 - (*n - 1) * *incy;
+    if (incx < 0)
+        x -= (m-1)*incx;
+    if (incy < 0)
+        y -= (n-1)*incy;
+
+    /*     Start the operations. In this version the elements of A are */
+    /*     accessed sequentially with one pass through A. */
+    
+    if( alpha == 0 )
+        ;
+    else if( incx == 1 )
+    {
+        for( j = 0; j < n; j++, a += lda )
+        {
+            real s = y[j*incy];
+            if( s == 0 )
+                continue;
+            s *= alpha;
+            
+            for( i = 0; i <= m - 2; i += 2 )
+            {
+                real t0 = a[i] + x[i]*s;
+                real t1 = a[i+1] + x[i+1]*s;
+                a[i] = t0; a[i+1] = t1;
+            }
+            
+            for( ; i < m; i++ )
+                a[i] += x[i]*s;
+        }
    }
-    if (*incx == 1) {
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.f) {
-		temp = *alpha * y[jy];
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-		}
-	    }
-	    jy += *incy;
-/* L20: */
-	}
-    } else {
-	if (*incx > 0) {
-	    kx = 1;
-	} else {
-	    kx = 1 - (*m - 1) * *incx;
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    if (y[jy] != 0.f) {
-		temp = *alpha * y[jy];
-		ix = kx;
-		i__2 = *m;
-		for (i__ = 1; i__ <= i__2; ++i__) {
-		    a[i__ + j * a_dim1] += x[ix] * temp;
-		    ix += *incx;
-/* L30: */
-		}
-	    }
-	    jy += *incy;
-/* L40: */
-	}
+    else
+    {
+        for( j = 0; j < n; j++, a += lda )
+        {
+            real s = y[j*incy];
+            if( s == 0 )
+                continue;
+            s *= alpha;
+            
+            for( i = 0; i < m; i++ )
+                a[i] += x[i*incx]*s;
+        }
    }

    return 0;

--- a/3rdparty/lapack/slamch.c
+++ b/3rdparty/lapack/slamch.c
-#include "clapack.h"
-#include <float.h>
-#include <stdio.h>
-
-/* *********************************************************************** */
-
-doublereal slamc3_(real *a, real *b)
-{
-    /* System generated locals */
-    real ret_val;
-    
-    
-    /*  -- LAPACK auxiliary routine (version 3.1) -- */
-    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-    /*     November 2006 */
-    
-    /*     .. Scalar Arguments .. */
-    /*     .. */
-    
-    /*  Purpose */
-    /*  ======= */
-    
-    /*  SLAMC3  is intended to force  A  and  B  to be stored prior to doing */
-    /*  the addition of  A  and  B ,  for use in situations where optimizers */
-    /*  might hold one of these in a register. */
-    
-    /*  Arguments */
-    /*  ========= */
-    
-    /*  A       (input) REAL */
-    /*  B       (input) REAL */
-    /*          The values A and B. */
-    
-    /* ===================================================================== */
-    
-    /*     .. Executable Statements .. */
-    
-    ret_val = *a + *b;
-    
-    return ret_val;
-    
-    /*     End of SLAMC3 */
-    
-} /* slamc3_ */
-
-
-#if 1
-
-/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
- taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
-
-#ifndef FLT_DIGITS
-#define FLT_DIGITS 24
-#endif
-
-doublereal
-slamch_(char *cmach) {
-    char ch = cmach[0];
-    float eps=FLT_EPSILON, sfmin, small;
-
-    if ('B' == ch || 'b' == ch) {
-        return FLT_RADIX;
-    } else if ('E' == ch || 'e' == ch) {
-        return eps;
-    } else if ('L' == ch || 'l' == ch) {
-        return FLT_MAX_EXP;
-    } else if ('M' == ch || 'm' == ch) {
-        return FLT_MIN_EXP;
-    } else if ('N' == ch || 'n' == ch) {
-        return FLT_DIGITS;
-    } else if ('O' == ch || 'o' == ch) {
-        return FLT_MAX;
-    } else if ('P' == ch || 'p' == ch) {
-        return eps * FLT_RADIX;
-    } else if ('R' == ch || 'r' == ch) {
-        return FLT_ROUNDS < 2;
-    } else if ('S' == ch || 's' == ch) {
-        /* Use SMALL plus a bit, to avoid the possibility of rounding causing overflow
-         when computing  1/sfmin. */
-        sfmin = FLT_MIN;
-        small = (float)(2. / FLT_MAX);
-        if (small <= sfmin) small = sfmin * (1 + eps);
-        return small;
-    } else if ('U' == ch || 'u' == ch) {
-        return FLT_MIN;
-    }
-
-    return 0;
-}
-
-#else
-
-/* Table of constant values */
-
-static integer c__1 = 1;
-static real c_b32 = 0.f;
-
-doublereal slamch_(char *cmach)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
-    /* System generated locals */
-    integer i__1;
-    real ret_val;
-
-    /* Builtin functions */
-    double pow_ri(real *, integer *);
-
-    /* Local variables */
-    static real t;
-    integer it;
-    static real rnd, eps, base;
-    integer beta;
-    static real emin, prec, emax;
-    integer imin, imax;
-    logical lrnd;
-    static real rmin, rmax;
-    real rmach;
-    extern logical lsame_(char *, char *);
-    real small;
-    static real sfmin;
-    extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real 
-	    *, integer *, real *, integer *, real *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SLAMCH determines single precision machine parameters. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  CMACH   (input) CHARACTER*1 */
-/*          Specifies the value to be returned by SLAMCH: */
-/*          = 'E' or 'e',   SLAMCH := eps */
-/*          = 'S' or 's ,   SLAMCH := sfmin */
-/*          = 'B' or 'b',   SLAMCH := base */
-/*          = 'P' or 'p',   SLAMCH := eps*base */
-/*          = 'N' or 'n',   SLAMCH := t */
-/*          = 'R' or 'r',   SLAMCH := rnd */
-/*          = 'M' or 'm',   SLAMCH := emin */
-/*          = 'U' or 'u',   SLAMCH := rmin */
-/*          = 'L' or 'l',   SLAMCH := emax */
-/*          = 'O' or 'o',   SLAMCH := rmax */
-
-/*          where */
-
-/*          eps   = relative machine precision */
-/*          sfmin = safe minimum, such that 1/sfmin does not overflow */
-/*          base  = base of the machine */
-/*          prec  = eps*base */
-/*          t     = number of (base) digits in the mantissa */
-/*          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise */
-/*          emin  = minimum exponent before (gradual) underflow */
-/*          rmin  = underflow threshold - base**(emin-1) */
-/*          emax  = largest exponent before overflow */
-/*          rmax  = overflow threshold  - (base**emax)*(1-eps) */
-
-/* ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Save statement .. */
-/*     .. */
-/*     .. Data statements .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    if (first) {
-	slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
-	base = (real) beta;
-	t = (real) it;
-	if (lrnd) {
-	    rnd = 1.f;
-	    i__1 = 1 - it;
-	    eps = pow_ri(&base, &i__1) / 2;
-	} else {
-	    rnd = 0.f;
-	    i__1 = 1 - it;
-	    eps = pow_ri(&base, &i__1);
-	}
-	prec = eps * base;
-	emin = (real) imin;
-	emax = (real) imax;
-	sfmin = rmin;
-	small = 1.f / rmax;
-	if (small >= sfmin) {
-
-/*           Use SMALL plus a bit, to avoid the possibility of rounding */
-/*           causing overflow when computing  1/sfmin. */
-
-	    sfmin = small * (eps + 1.f);
-	}
-    }
-
-    if (lsame_(cmach, "E")) {
-	rmach = eps;
-    } else if (lsame_(cmach, "S")) {
-	rmach = sfmin;
-    } else if (lsame_(cmach, "B")) {
-	rmach = base;
-    } else if (lsame_(cmach, "P")) {
-	rmach = prec;
-    } else if (lsame_(cmach, "N")) {
-	rmach = t;
-    } else if (lsame_(cmach, "R")) {
-	rmach = rnd;
-    } else if (lsame_(cmach, "M")) {
-	rmach = emin;
-    } else if (lsame_(cmach, "U")) {
-	rmach = rmin;
-    } else if (lsame_(cmach, "L")) {
-	rmach = emax;
-    } else if (lsame_(cmach, "O")) {
-	rmach = rmax;
-    }
-
-    ret_val = rmach;
-    first = FALSE_;
-    return ret_val;
-
-/*     End of SLAMCH */
-
-} /* slamch_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical 
-	*ieee1)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
-    /* System generated locals */
-    real r__1, r__2;
-
-    /* Local variables */
-    real a, b, c__, f, t1, t2;
-    static integer lt;
-    real one, qtr;
-    static logical lrnd;
-    static integer lbeta;
-    real savec;
-    static logical lieee1;
-    extern doublereal slamc3_(real *, real *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SLAMC1 determines the machine parameters given by BETA, T, RND, and */
-/*  IEEE1. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  BETA    (output) INTEGER */
-/*          The base of the machine. */
-
-/*  T       (output) INTEGER */
-/*          The number of ( BETA ) digits in the mantissa. */
-
-/*  RND     (output) LOGICAL */
-/*          Specifies whether proper rounding  ( RND = .TRUE. )  or */
-/*          chopping  ( RND = .FALSE. )  occurs in addition. This may not */
-/*          be a reliable guide to the way in which the machine performs */
-/*          its arithmetic. */
-
-/*  IEEE1   (output) LOGICAL */
-/*          Specifies whether rounding appears to be done in the IEEE */
-/*          'round to nearest' style. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  The routine is based on the routine  ENVRON  by Malcolm and */
-/*  incorporates suggestions by Gentleman and Marovich. See */
-
-/*     Malcolm M. A. (1972) Algorithms to reveal properties of */
-/*        floating-point arithmetic. Comms. of the ACM, 15, 949-951. */
-
-/*     Gentleman W. M. and Marovich S. B. (1974) More on algorithms */
-/*        that reveal properties of floating point arithmetic units. */
-/*        Comms. of the ACM, 17, 276-277. */
-
-/* ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Save statement .. */
-/*     .. */
-/*     .. Data statements .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    if (first) {
-	one = 1.f;
-
-/*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA, */
-/*        IEEE1, T and RND. */
-
-/*        Throughout this routine  we use the function  SLAMC3  to ensure */
-/*        that relevant values are  stored and not held in registers,  or */
-/*        are not affected by optimizers. */
-
-/*        Compute  a = 2.0**m  with the  smallest positive integer m such */
-/*        that */
-
-/*           fl( a + 1.0 ) = a. */
-
-	a = 1.f;
-	c__ = 1.f;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L10:
-	if (c__ == one) {
-	    a *= 2;
-	    c__ = slamc3_(&a, &one);
-	    r__1 = -a;
-	    c__ = slamc3_(&c__, &r__1);
-	    goto L10;
-	}
-/* +       END WHILE */
-
-/*        Now compute  b = 2.0**m  with the smallest positive integer m */
-/*        such that */
-
-/*           fl( a + b ) .gt. a. */
-
-	b = 1.f;
-	c__ = slamc3_(&a, &b);
-
-/* +       WHILE( C.EQ.A )LOOP */
-L20:
-	if (c__ == a) {
-	    b *= 2;
-	    c__ = slamc3_(&a, &b);
-	    goto L20;
-	}
-/* +       END WHILE */
-
-/*        Now compute the base.  a and c  are neighbouring floating point */
-/*        numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so */
-/*        their difference is beta. Adding 0.25 to c is to ensure that it */
-/*        is truncated to beta and not ( beta - 1 ). */
-
-	qtr = one / 4;
-	savec = c__;
-	r__1 = -a;
-	c__ = slamc3_(&c__, &r__1);
-	lbeta = c__ + qtr;
-
-/*        Now determine whether rounding or chopping occurs,  by adding a */
-/*        bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a. */
-
-	b = (real) lbeta;
-	r__1 = b / 2;
-	r__2 = -b / 100;
-	f = slamc3_(&r__1, &r__2);
-	c__ = slamc3_(&f, &a);
-	if (c__ == a) {
-	    lrnd = TRUE_;
-	} else {
-	    lrnd = FALSE_;
-	}
-	r__1 = b / 2;
-	r__2 = b / 100;
-	f = slamc3_(&r__1, &r__2);
-	c__ = slamc3_(&f, &a);
-	if (lrnd && c__ == a) {
-	    lrnd = FALSE_;
-	}
-
-/*        Try and decide whether rounding is done in the  IEEE  'round to */
-/*        nearest' style. B/2 is half a unit in the last place of the two */
-/*        numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit */
-/*        zero, and SAVEC is odd. Thus adding B/2 to A should not  change */
-/*        A, but adding B/2 to SAVEC should change SAVEC. */
-
-	r__1 = b / 2;
-	t1 = slamc3_(&r__1, &a);
-	r__1 = b / 2;
-	t2 = slamc3_(&r__1, &savec);
-	lieee1 = t1 == a && t2 > savec && lrnd;
-
-/*        Now find  the  mantissa, t.  It should  be the  integer part of */
-/*        log to the base beta of a,  however it is safer to determine  t */
-/*        by powering.  So we find t as the smallest positive integer for */
-/*        which */
-
-/*           fl( beta**t + 1.0 ) = 1.0. */
-
-	lt = 0;
-	a = 1.f;
-	c__ = 1.f;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L30:
-	if (c__ == one) {
-	    ++lt;
-	    a *= lbeta;
-	    c__ = slamc3_(&a, &one);
-	    r__1 = -a;
-	    c__ = slamc3_(&c__, &r__1);
-	    goto L30;
-	}
-/* +       END WHILE */
-
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *ieee1 = lieee1;
-    first = FALSE_;
-    return 0;
-
-/*     End of SLAMC1 */
-
-} /* slamc1_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real *
-	eps, integer *emin, real *rmin, integer *emax, real *rmax)
-{
-    /* Initialized data */
-
-    static logical first = TRUE_;
-    static logical iwarn = FALSE_;
-
-    /* Format strings */
-    static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre"
-	    "ct:-\002,\002  EMIN = \002,i8,/\002 If, after inspection, the va"
-	    "lue EMIN looks\002,\002 acceptable please comment out \002,/\002"
-	    " the IF block as marked within the code of routine\002,\002 SLAM"
-	    "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)";
-
-    /* System generated locals */
-    integer i__1;
-    real r__1, r__2, r__3, r__4, r__5;
-
-    /* Builtin functions */
-    double pow_ri(real *, integer *);
-    //integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
-
-    /* Local variables */
-    real a, b, c__;
-    integer i__;
-    static integer lt;
-    real one, two;
-    logical ieee;
-    real half;
-    logical lrnd;
-    static real leps;
-    real zero;
-    static integer lbeta;
-    real rbase;
-    static integer lemin, lemax;
-    integer gnmin;
-    real small;
-    integer gpmin;
-    real third;
-    static real lrmin, lrmax;
-    real sixth;
-    logical lieee1;
-    extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, 
-	    logical *);
-    extern doublereal slamc3_(real *, real *);
-    extern /* Subroutine */ int slamc4_(integer *, real *, integer *), 
-	    slamc5_(integer *, integer *, integer *, logical *, integer *, 
-	    real *);
-    integer ngnmin, ngpmin;
-
-    /* Fortran I/O blocks */
-    static cilist io___58 = { 0, 6, 0, fmt_9999, 0 };
-
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SLAMC2 determines the machine parameters specified in its argument */
-/*  list. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  BETA    (output) INTEGER */
-/*          The base of the machine. */
-
-/*  T       (output) INTEGER */
-/*          The number of ( BETA ) digits in the mantissa. */
-
-/*  RND     (output) LOGICAL */
-/*          Specifies whether proper rounding  ( RND = .TRUE. )  or */
-/*          chopping  ( RND = .FALSE. )  occurs in addition. This may not */
-/*          be a reliable guide to the way in which the machine performs */
-/*          its arithmetic. */
-
-/*  EPS     (output) REAL */
-/*          The smallest positive number such that */
-
-/*             fl( 1.0 - EPS ) .LT. 1.0, */
-
-/*          where fl denotes the computed value. */
-
-/*  EMIN    (output) INTEGER */
-/*          The minimum exponent before (gradual) underflow occurs. */
-
-/*  RMIN    (output) REAL */
-/*          The smallest normalized number for the machine, given by */
-/*          BASE**( EMIN - 1 ), where  BASE  is the floating point value */
-/*          of BETA. */
-
-/*  EMAX    (output) INTEGER */
-/*          The maximum exponent before overflow occurs. */
-
-/*  RMAX    (output) REAL */
-/*          The largest positive number for the machine, given by */
-/*          BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point */
-/*          value of BETA. */
-
-/*  Further Details */
-/*  =============== */
-
-/*  The computation of  EPS  is based on a routine PARANOIA by */
-/*  W. Kahan of the University of California at Berkeley. */
-
-/* ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Save statement .. */
-/*     .. */
-/*     .. Data statements .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    if (first) {
-	zero = 0.f;
-	one = 1.f;
-	two = 2.f;
-
-/*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of */
-/*        BETA, T, RND, EPS, EMIN and RMIN. */
-
-/*        Throughout this routine  we use the function  SLAMC3  to ensure */
-/*        that relevant values are stored  and not held in registers,  or */
-/*        are not affected by optimizers. */
-
-/*        SLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1. */
-
-	slamc1_(&lbeta, &lt, &lrnd, &lieee1);
-
-/*        Start to find EPS. */
-
-	b = (real) lbeta;
-	i__1 = -lt;
-	a = pow_ri(&b, &i__1);
-	leps = a;
-
-/*        Try some tricks to see whether or not this is the correct  EPS. */
-
-	b = two / 3;
-	half = one / 2;
-	r__1 = -half;
-	sixth = slamc3_(&b, &r__1);
-	third = slamc3_(&sixth, &sixth);
-	r__1 = -half;
-	b = slamc3_(&third, &r__1);
-	b = slamc3_(&b, &sixth);
-	b = dabs(b);
-	if (b < leps) {
-	    b = leps;
-	}
-
-	leps = 1.f;
-
-/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
-L10:
-	if (leps > b && b > zero) {
-	    leps = b;
-	    r__1 = half * leps;
-/* Computing 5th power */
-	    r__3 = two, r__4 = r__3, r__3 *= r__3;
-/* Computing 2nd power */
-	    r__5 = leps;
-	    r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
-	    c__ = slamc3_(&r__1, &r__2);
-	    r__1 = -c__;
-	    c__ = slamc3_(&half, &r__1);
-	    b = slamc3_(&half, &c__);
-	    r__1 = -b;
-	    c__ = slamc3_(&half, &r__1);
-	    b = slamc3_(&half, &c__);
-	    goto L10;
-	}
-/* +       END WHILE */
-
-	if (a < leps) {
-	    leps = a;
-	}
-
-/*        Computation of EPS complete. */
-
-/*        Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)). */
-/*        Keep dividing  A by BETA until (gradual) underflow occurs. This */
-/*        is detected when we cannot recover the previous A. */
-
-	rbase = one / lbeta;
-	small = one;
-	for (i__ = 1; i__ <= 3; ++i__) {
-	    r__1 = small * rbase;
-	    small = slamc3_(&r__1, &zero);
-/* L20: */
-	}
-	a = slamc3_(&one, &small);
-	slamc4_(&ngpmin, &one, &lbeta);
-	r__1 = -one;
-	slamc4_(&ngnmin, &r__1, &lbeta);
-	slamc4_(&gpmin, &a, &lbeta);
-	r__1 = -a;
-	slamc4_(&gnmin, &r__1, &lbeta);
-	ieee = FALSE_;
-
-	if (ngpmin == ngnmin && gpmin == gnmin) {
-	    if (ngpmin == gpmin) {
-		lemin = ngpmin;
-/*            ( Non twos-complement machines, no gradual underflow; */
-/*              e.g.,  VAX ) */
-	    } else if (gpmin - ngpmin == 3) {
-		lemin = ngpmin - 1 + lt;
-		ieee = TRUE_;
-/*            ( Non twos-complement machines, with gradual underflow; */
-/*              e.g., IEEE standard followers ) */
-	    } else {
-		lemin = min(ngpmin,gpmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
-
-	} else if (ngpmin == gpmin && ngnmin == gnmin) {
-	    if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
-		lemin = max(ngpmin,ngnmin);
-/*            ( Twos-complement machines, no gradual underflow; */
-/*              e.g., CYBER 205 ) */
-	    } else {
-		lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
-
-	} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
-		 {
-	    if (gpmin - min(ngpmin,ngnmin) == 3) {
-		lemin = max(ngpmin,ngnmin) - 1 + lt;
-/*            ( Twos-complement machines with gradual underflow; */
-/*              no known machine ) */
-	    } else {
-		lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-		iwarn = TRUE_;
-	    }
-
-	} else {
-/* Computing MIN */
-	    i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
-	    lemin = min(i__1,gnmin);
-/*         ( A guess; no known machine ) */
-	    iwarn = TRUE_;
-	}
-	first = FALSE_;
-/* ** */
-/* Comment out this if block if EMIN is ok */
-	if (iwarn) {
-	    first = TRUE_;
-	    printf("\n\n WARNING. The value EMIN may be incorrect:- ");
-	    printf("EMIN = %8i\n",lemin);
-	    printf("If, after inspection, the value EMIN looks acceptable");
-            printf("please comment out \n the IF block as marked within the"); 
-            printf("code of routine SLAMC2, \n otherwise supply EMIN"); 
-            printf("explicitly.\n");
-         /*
-	    s_wsfe(&io___58);
-	    do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer));
-	    e_wsfe();
-         */
-	}
-/* ** */
-
-/*        Assume IEEE arithmetic if we found denormalised  numbers above, */
-/*        or if arithmetic seems to round in the  IEEE style,  determined */
-/*        in routine SLAMC1. A true IEEE machine should have both  things */
-/*        true; however, faulty machines may have one or the other. */
-
-	ieee = ieee || lieee1;
-
-/*        Compute  RMIN by successive division by  BETA. We could compute */
-/*        RMIN as BASE**( EMIN - 1 ),  but some machines underflow during */
-/*        this computation. */
-
-	lrmin = 1.f;
-	i__1 = 1 - lemin;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    r__1 = lrmin * rbase;
-	    lrmin = slamc3_(&r__1, &zero);
-/* L30: */
-	}
-
-/*        Finally, call SLAMC5 to compute EMAX and RMAX. */
-
-	slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *eps = leps;
-    *emin = lemin;
-    *rmin = lrmin;
-    *emax = lemax;
-    *rmax = lrmax;
-
-    return 0;
-
-
-/*     End of SLAMC2 */
-
-} /* slamc2_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base)
-{
-    /* System generated locals */
-    integer i__1;
-    real r__1;
-
-    /* Local variables */
-    real a;
-    integer i__;
-    real b1, b2, c1, c2, d1, d2, one, zero, rbase;
-    extern doublereal slamc3_(real *, real *);
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SLAMC4 is a service routine for SLAMC2. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  EMIN    (output) INTEGER */
-/*          The minimum exponent before (gradual) underflow, computed by */
-/*          setting A = START and dividing by BASE until the previous A */
-/*          can not be recovered. */
-
-/*  START   (input) REAL */
-/*          The starting point for determining EMIN. */
-
-/*  BASE    (input) INTEGER */
-/*          The base of the machine. */
-
-/* ===================================================================== */
-
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-    a = *start;
-    one = 1.f;
-    rbase = one / *base;
-    zero = 0.f;
-    *emin = 1;
-    r__1 = a * rbase;
-    b1 = slamc3_(&r__1, &zero);
-    c1 = a;
-    c2 = a;
-    d1 = a;
-    d2 = a;
-/* +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */
-/*    $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP */
-L10:
-    if (c1 == a && c2 == a && d1 == a && d2 == a) {
-	--(*emin);
-	a = b1;
-	r__1 = a / *base;
-	b1 = slamc3_(&r__1, &zero);
-	r__1 = b1 * *base;
-	c1 = slamc3_(&r__1, &zero);
-	d1 = zero;
-	i__1 = *base;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d1 += b1;
-/* L20: */
-	}
-	r__1 = a * rbase;
-	b2 = slamc3_(&r__1, &zero);
-	r__1 = b2 / rbase;
-	c2 = slamc3_(&r__1, &zero);
-	d2 = zero;
-	i__1 = *base;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    d2 += b2;
-/* L30: */
-	}
-	goto L10;
-    }
-/* +    END WHILE */
-
-    return 0;
-
-/*     End of SLAMC4 */
-
-} /* slamc4_ */
-
-
-/* *********************************************************************** */
-
-/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, 
-	logical *ieee, integer *emax, real *rmax)
-{
-    /* System generated locals */
-    integer i__1;
-    real r__1;
-
-    /* Local variables */
-    integer i__;
-    real y, z__;
-    integer try__, lexp;
-    real oldy;
-    integer uexp, nbits;
-    extern doublereal slamc3_(real *, real *);
-    real recbas;
-    integer exbits, expsum;
-
-
-/*  -- LAPACK auxiliary routine (version 3.1) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
-/*     November 2006 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SLAMC5 attempts to compute RMAX, the largest machine floating-point */
-/*  number, without overflow.  It assumes that EMAX + abs(EMIN) sum */
-/*  approximately to a power of 2.  It will fail on machines where this */
-/*  assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */
-/*  EMAX = 28718).  It will also fail if the value supplied for EMIN is */
-/*  too large (i.e. too close to zero), probably with overflow. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  BETA    (input) INTEGER */
-/*          The base of floating-point arithmetic. */
-
-/*  P       (input) INTEGER */
-/*          The number of base BETA digits in the mantissa of a */
-/*          floating-point value. */
-
-/*  EMIN    (input) INTEGER */
-/*          The minimum exponent before (gradual) underflow. */
-
-/*  IEEE    (input) LOGICAL */
-/*          A logical flag specifying whether or not the arithmetic */
-/*          system is thought to comply with the IEEE standard. */
-
-/*  EMAX    (output) INTEGER */
-/*          The largest exponent before overflow */
-
-/*  RMAX    (output) REAL */
-/*          The largest machine floating-point number. */
-
-/* ===================================================================== */
-
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     First compute LEXP and UEXP, two powers of 2 that bound */
-/*     abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */
-/*     approximately to the bound that is closest to abs(EMIN). */
-/*     (EMAX is the exponent of the required number RMAX). */
-
-    lexp = 1;
-    exbits = 1;
-L10:
-    try__ = lexp << 1;
-    if (try__ <= -(*emin)) {
-	lexp = try__;
-	++exbits;
-	goto L10;
-    }
-    if (lexp == -(*emin)) {
-	uexp = lexp;
-    } else {
-	uexp = try__;
-	++exbits;
-    }
-
-/*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater */
-/*     than or equal to EMIN. EXBITS is the number of bits needed to */
-/*     store the exponent. */
-
-    if (uexp + *emin > -lexp - *emin) {
-	expsum = lexp << 1;
-    } else {
-	expsum = uexp << 1;
-    }
-
-/*     EXPSUM is the exponent range, approximately equal to */
-/*     EMAX - EMIN + 1 . */
-
-    *emax = expsum + *emin - 1;
-    nbits = exbits + 1 + *p;
-
-/*     NBITS is the total number of bits needed to store a */
-/*     floating-point number. */
-
-    if (nbits % 2 == 1 && *beta == 2) {
-
-/*        Either there are an odd number of bits used to store a */
-/*        floating-point number, which is unlikely, or some bits are */
-/*        not used in the representation of numbers, which is possible, */
-/*        (e.g. Cray machines) or the mantissa has an implicit bit, */
-/*        (e.g. IEEE machines, Dec Vax machines), which is perhaps the */
-/*        most likely. We have to assume the last alternative. */
-/*        If this is true, then we need to reduce EMAX by one because */
-/*        there must be some way of representing zero in an implicit-bit */
-/*        system. On machines like Cray, we are reducing EMAX by one */
-/*        unnecessarily. */
-
-	--(*emax);
-    }
-
-    if (*ieee) {
-
-/*        Assume we are on an IEEE machine which reserves one exponent */
-/*        for infinity and NaN. */
-
-	--(*emax);
-    }
-
-/*     Now create RMAX, the largest machine number, which should */
-/*     be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */
-
-/*     First compute 1.0 - BETA**(-P), being careful that the */
-/*     result is less than 1.0 . */
-
-    recbas = 1.f / *beta;
-    z__ = *beta - 1.f;
-    y = 0.f;
-    i__1 = *p;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	z__ *= recbas;
-	if (y < 1.f) {
-	    oldy = y;
-	}
-	y = slamc3_(&y, &z__);
-/* L20: */
-    }
-    if (y >= 1.f) {
-	y = oldy;
-    }
-
-/*     Now multiply by BETA**EMAX to get RMAX. */
-
-    i__1 = *emax;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	r__1 = y * *beta;
-	y = slamc3_(&r__1, &c_b32);
-/* L30: */
-    }
-
-    *rmax = y;
-    return 0;
-
-/*     End of SLAMC5 */
-
-} /* slamc5_ */
-
-#endif
--- a/3rdparty/lapack/slamch_custom.c
+++ b/3rdparty/lapack/slamch_custom.c
+#include "clapack.h"
+#include <float.h>
+#include <stdio.h>
+
+/* *********************************************************************** */
+
+doublereal slamc3_(real *a, real *b)
+{
+    /* System generated locals */
+    real ret_val;
+    
+    
+    /*  -- LAPACK auxiliary routine (version 3.1) -- */
+    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+    /*     November 2006 */
+    
+    /*     .. Scalar Arguments .. */
+    /*     .. */
+    
+    /*  Purpose */
+    /*  ======= */
+    
+    /*  SLAMC3  is intended to force  A  and  B  to be stored prior to doing */
+    /*  the addition of  A  and  B ,  for use in situations where optimizers */
+    /*  might hold one of these in a register. */
+    
+    /*  Arguments */
+    /*  ========= */
+    
+    /*  A       (input) REAL */
+    /*  B       (input) REAL */
+    /*          The values A and B. */
+    
+    /* ===================================================================== */
+    
+    /*     .. Executable Statements .. */
+    
+    ret_val = *a + *b;
+    
+    return ret_val;
+    
+    /*     End of SLAMC3 */
+    
+} /* slamc3_ */
+
+
+const unsigned char lapack_toupper_tab[] =
+{
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+    24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+    46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
+    68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+    90, 91, 92, 93, 94, 95, 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+    132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
+    168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
+    186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203,
+    204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,
+    222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+    240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
+};
+
+/* simpler version of dlamch for the case of IEEE754-compliant FPU module by Piotr Luszczek S.
+ taken from http://www.mail-archive.com/numpy-discussion@lists.sourceforge.net/msg02448.html */
+
+#ifndef FLT_DIGITS
+#define FLT_DIGITS 24
+#endif
+
+const unsigned char lapack_lamch_tab[] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 0, 8, 9, 0, 10, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6, 7, 0, 8, 9,
+    0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const doublereal lapack_slamch_tab[] =
+{
+    0, FLT_RADIX, FLT_EPSILON, FLT_MAX_EXP, FLT_MIN_EXP, FLT_DIGITS, FLT_MAX,
+    FLT_EPSILON*FLT_RADIX, 1, FLT_MIN*(1 + FLT_EPSILON), FLT_MIN
+}; 
--- a/3rdparty/lapack/slartg.c
+++ b/3rdparty/lapack/slartg.c
-/* slartg.f -- translated by f2c (version 20061008).
-   You must link the resulting object file with libf2c:
-	on Microsoft Windows system, link with libf2c.lib;
-	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
-	or, if you install libf2c.a in a standard place, with -lf2c -lm
-	-- in that order, at the end of the command line, as in
-		cc *.o -lf2c -lm
-	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
-
-		http://www.netlib.org/f2c/libf2c.zip
-*/
-
 #include "clapack.h"


@@ -19,17 +7,13 @@
    integer i__1;
    real r__1, r__2;

-    /* Builtin functions */
-    double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal);
-
    /* Local variables */
    integer i__;
    real f1, g1, eps, scale;
    integer count;
-    real safmn2, safmx2;
-    extern doublereal slamch_(char *);
-    real safmin;
-
+    static real safmn2, safmx2;
+    static real safmin;
+    static logical FIRST = TRUE_;

 /*  -- LAPACK auxiliary routine (version 3.2) -- */
 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
@@ -96,15 +80,16 @@
 /*     .. */
 /*     .. Executable Statements .. */

-/*     IF( FIRST ) THEN */
-    safmin = slamch_("S");
-    eps = slamch_("E");
-    r__1 = slamch_("B");
-    i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f);
-    safmn2 = pow_ri(&r__1, &i__1);
-    safmx2 = 1.f / safmn2;
-/*        FIRST = .FALSE. */
-/*     END IF */
+    if(FIRST)
+    {
+        safmin = slamch_("S");
+        eps = slamch_("E");
+        r__1 = slamch_("B");
+        i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f);
+        safmn2 = pow_ri(&r__1, &i__1);
+        safmx2 = 1.f / safmn2;
+        FIRST = FALSE_;
+    }
    if (*g == 0.f) {
 	*cs = 1.f;
 	*sn = 0.f;

--- a/3rdparty/lapack/slasr.c
+++ b/3rdparty/lapack/slasr.c